diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 493050618d..7c758a5081 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -240,6 +240,16 @@ functions and procedures + + pg_publication + publications for logical replication + + + + pg_publication_rel + relation to publication mapping + + pg_range information about range types @@ -285,6 +295,11 @@ planner statistics + + pg_subscription + logical replication subscriptions + + pg_tablespace tablespaces within this database cluster @@ -5271,6 +5286,137 @@ + + <structname>pg_publication</structname> + + + pg_publication + + + + The catalog pg_publication contains all + publications created in the database. For more on publications see + . + + + + <structname>pg_publication</structname> Columns + + + + + Name + Type + References + Description + + + + + + oid + oid + + Row identifier (hidden attribute; must be explicitly selected) + + + + pubname + Name + + Name of the publication + + + + pubowner + oid + pg_authid.oid + Owner of the publication + + + + puballtables + bool + + If true, this publication automatically includes all tables + in the database, including any that will be created in the future. + + + + + pubinsert + bool + + If true, INSERT operations are replicated for + tables in the publication. + + + + pubupdate + bool + + If true, UPDATE operations are replicated for + tables in the publication. + + + + pubdelete + bool + + If true, DELETE operations are replicated for + tables in the publication. + + + +
+
+ + + <structname>pg_publication_rel</structname> + + + pg_publication_rel + + + + The catalog pg_publication_rel contains the + mapping between relations and publications in the database. This is a + many-to-many mapping. See also + for a more user-friendly view of this information. + + + + <structname>pg_publication_rel</structname> Columns + + + + + Name + Type + References + Description + + + + + + prpubid + oid + pg_publication.oid + Reference to publication + + + + prrelid + oid + pg_class.oid + Reference to relation + + + +
+
+ <structname>pg_range</structname> @@ -6150,6 +6296,109 @@ + + <structname>pg_subscription</structname> + + + pg_subscription + + + + The catalog pg_subscription contains all existing + logical replication subscriptions. For more information about logical + replication see . + + + + Unlike most system catalogs, pg_subscription is + shared across all databases of a cluster: There is only one copy + of pg_subscription per cluster, not one per + database. + + + + Access to this catalog is restricted from normal users. Normal users can + use the view to get some information + about subscriptions. + + + + <structname>pg_subscription</structname> Columns + + + + + Name + Type + References + Description + + + + + + oid + oid + + Row identifier (hidden attribute; must be explicitly selected) + + + + subdbid + oid + pg_database.oid + OID of the database which the subscription resides in + + + + subname + name + + Name of the subscription + + + + subowner + oid + pg_authid.oid + Owner of the subscription + + + + subenabled + bool + + If true, the subscription is enabled and should be replicating. + + + + subconninfo + text + + Connection string to the upstream database + + + + subslotname + name + + Name of the replication slot in the upstream database. Also used + for local replication origin name. + + + + subpublications + text[] + + Array of subscribed publication names. These reference the + publications on the publisher server. For more on publications + see . + + + + +
+
<structname>pg_tablespace</structname> @@ -7589,6 +7838,11 @@ prepared transactions
+ + pg_publication_tables + publications and their associated tables + + pg_replication_origin_status information about replication origins, including replication progress @@ -8871,6 +9125,61 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx + + <structname>pg_publication_tables</structname> + + + pg_publication_tables + + + + The view pg_publication_tables provides + information about the mapping between publications and the tables they + contain. Unlike the underlying + catalog pg_publication_rel, this view expands + publications defined as FOR ALL TABLES, so for such + publications there will be a row for each eligible table. + + + + <structname>pg_publication_tables</structname> Columns + + + + + Name + Type + References + Description + + + + + + pubname + name + pg_publication.pubname + Name of publication + + + + schemaname + name + pg_namespace.nspname + Name of schema containing table + + + + tablename + name + pg_class.relname + Name of table + + + +
+
+ <structname>pg_replication_origin_status</structname> diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 07afa3c77a..fb5d6473ef 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3411,6 +3411,47 @@ ANY num_sync ( + Subscribers + + + These settings control the behavior of a logical replication subscriber. + Their values on the publisher are irrelevant. + + + + Note that wal_receiver_timeout and + wal_retrieve_retry_interval configuration parameters + affect the logical replication workers as well. + + + + + + max_logical_replication_workers (int) + + max_logical_replication_workers configuration parameter + + + + + Specifies maximum number of logical replication workers. This includes + both apply workers and table synchronization workers. + + + Logical replication workers are taken from the pool defined by + max_worker_processes. + + + The default value is 4. + + + + + + + diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 69649a7da4..2624c627dc 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -50,6 +50,7 @@ + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 2504a466e6..b214218791 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -18762,7 +18762,7 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
- + pg_replication_origin_advance diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml new file mode 100644 index 0000000000..9312c0c9a0 --- /dev/null +++ b/doc/src/sgml/logical-replication.sgml @@ -0,0 +1,396 @@ + + + + Logical Replication + + + Logical replication is a method of replicating data objects and their + changes, based upon their replication identity (usually a primary key). We + use the term logical in contrast to physical replication, which uses exact + block addresses and byte-by-byte replication. PostgreSQL supports both + mechanisms concurrently, see . Logical + replication allows fine-grained control over both data replication and + security. + + + + Logical replication uses a publish + and subscribe model with one or + more subscribers subscribing to one or more + publications on a publisher + node. Subscribers pull data from the publications they subscribe to and may + subsequently re-publish data to allow cascading replication or more complex + configurations. + + + + Logical replication sends the changes on the publisher to the subscriber as + they occur in real-time. The subscriber applies the data in the same order + as the publisher so that transactional consistency is guaranteed for + publications within a single subscription. This method of data replication + is sometimes referred to as transactional replication. + + + + The typical use-cases for logical replication are: + + + + + Sending incremental changes in a single database or a subset of a + database to subscribers as they occur. + + + + + + Firing triggers for individual changes as they are incoming to + subscriber. + + + + + + Consolidating multiple databases into a single one (for example for + analytical purposes). + + + + + + Replicating between different major versions of PostgreSQL. + + + + + + Giving access to replicated data to different groups of users. + + + + + + Sharing a subset of the database between multiple databases. + + + + + + + The subscriber database behaves in the same way as any other PostgreSQL + instance and can be used as a publisher for other databases by defining its + own publications. When the subscriber is treated as read-only by + application, there will be no conflicts from a single subscription. On the + other hand, if there are other writes done either by application or other + subscribers to the same set of tables conflicts can arise. + + + + Publication + + + A publication object can be defined on any physical + replication master. The node where a publication is defined is referred to + as publisher. A publication is a set of changes + generated from a group of tables, and might also be described as a change + set or replication set. Each publication exists in only one database. + + + + Publications are different from schemas and do not affect how the table is + accessed. Each table can be added to multiple publications if needed. + Publications may currently only contain tables. Objects must be added + explicitly, except when a publication is created for ALL + TABLES. + + + + Publications can choose to limit the changes they produce to show + any combination of INSERT, UPDATE, and + DELETE in a similar way to the way triggers are fired by + particular event types. If a table without a REPLICA + IDENTITY is added to a publication that + replicates UPDATE or DELETE + operations then subsequent UPDATE + or DELETE operations will fail on the publisher. + + + + Every publication can have multiple subscribers. + + + + A publication is created using the + command and may be later altered or dropped using corresponding commands. + + + + The individual tables can be added and removed dynamically using + . Both the ADD + TABLE and DROP TABLE operations are + transactional; so the table will start or stop replicating at the correct + snapshot once the transaction has committed. + + + + + Subscription + + + A subscription is the downstream side of logical + replication. The node where a subscription is defined is referred to as + the subscriber. Subscription defines the connection + to another database and set of publications (one or more) to which it wants + to be subscribed. + + + + The subscriber database behaves in the same way as any other PostgreSQL + instance and can be used as a publisher for other databases by defining its + own publications. + + + + A subscriber node may have multiple subscriptions if desired. It is + possible to define multiple subscriptions between a single + publisher-subscriber pair, in which case extra care must be taken to ensure + that the subscribed publication objects don't overlap. + + + + Each subscription will receive changes via one replication slot (see + ). + + + + Subscriptions are not dumped by pg_dump by default but + can be requested using the command-line + option . + + + + The subscription is added using and + can be stopped/resumed at any time using the + command and removed using + . + + + + When a subscription is dropped and recreated, the synchronization + information is lost. This means that the data has to be resynchronized + afterwards. + + + + The schema definitions are not replicated and the published tables must + exist on the subsriber for replication to work. Only regular tables may be + the target of replication. For example, you can't replicate to a view. + + + + The tables are matched between the publisher and the subscriber using the + fully qualified table name. Replication to differently-named tables on the + subscriber is not supported. + + + + Columns of a table are also matched by name. A different order of columns + in the target table is allowed, but the column types have to match. + + + + + Conflicts + + + The logical replication behaves similarly to normal DML operations in that + the data will be updated even if it was changed locally on the subscriber + node. If the incoming data violates any constraints the replication will + stop. This is referred to as a conflict. When + replicating UPDATE or DELETE + operations, missing data will not produce a conflict and such operations + will simply be skipped. + + + + A conflict will produce an error and will stop the replication; it must be + resolved manually by the user. Details about the conflict can be found in + the subscriber's server log. + + + + The resolution can be done either by changing data on the subscriber so + that it does not conflict with the incoming change or by skipping the + transaction that conflicts with the existing data. The transaction can be + skipped by calling the + pg_replication_origin_advance() function with + a node_name corresponding to the subscription name. + The current position of origins can be seen in the + + pg_replication_origin_status system view. + + + + + Architecture + + + Logical replication starts by copying a snapshot of the data on the + publisher database. Once that is done, changes on the publisher are sent + to the subscriber as they occur in real time. The subscriber applies data + in the order in which commits were made on the publisher so that + transactional consistency is guaranteed for the publications within any + single subscription. + + + + Logical replication is built with an architecture similar to physical + streaming replication (see ). It is + implemented by walsender and the apply + processes. The walsender starts logical decoding (described + in ) of the WAL and loads the standard + logical decoding plugin (pgoutput). The plugin transforms the changes read + from WAL to the logical replication protocol + (see ) and filters the data + according to the publication specification. The data is then continuously + transferred using the streaming replication protocol to the apply worker, + which maps the data to local tables and applies the individual changes as + they are received in exact transactional order. + + + + The apply process on the subscriber database always runs with + session_replication_role set + to replica, which produces the usual effects on triggers + and constraints. + + + + + Monitoring + + + Because logical replication is based on similar architecture as + physical streaming replication + the monitoring on a publication node is very similar to monitoring of + physical replication master + (see ). + + + + The monitoring information about subscription is visible in + pg_stat_subscription. + This view contains one row for every subscription worker. A subscription + can have zero or more active subscription workers depending on its state. + + + + Normally, there is a single apply process running for an enabled + subscription. A disabled subscription or a crashed subscription will have + zero rows in this view. + + + + + Security + + + Logical replication connections occur in the same way as physical streaming + replication. It requires access to be specifically given using + pg_hba.conf. The role used for the replication + connection must have the REPLICATION attribute. This + gives a role access to both logical and physical replication. + + + + To create a publication, the user must have the CREATE + privilege in the database. + + + + To create a subscription, the user must be a superuser. + + + + The subscription apply process will run in the local database with the + privileges of a superuser. + + + + Privileges are only checked once at the start of a replication connection. + They are not re-checked as each change record is read from the publisher, + nor are they re-checked for each change when applied. + + + + + Configuration Settings + + + Logical replication requires several configuration options to be set. + + + + On the publisher side, wal_level must be set to + logical, and max_replication_slots + has to be set to at least the number of subscriptions expected to connect. + And max_wal_senders should be set to at least the same + as max_replication_slots plus the number of physical replicas + that are connected at the same time. + + + + The subscriber also requires the max_replication_slots + to be set. In this case it should be set to at least the number of + subscriptions that will be added to the subscriber. + max_logical_replication_workers has to be set to at + least the number of subscriptions. Additionally the + max_worker_processes may need to be adjusted to + accommodate for replication workers, at least + (max_logical_replication_workers + + 1). Note that some extensions and parallel queries + also take worker slots from max_worker_processes. + + + + + Quick Setup + + + First set the configuration options in postgresql.conf: + +wal_level = logical + + The other required settings have default values that are sufficient for a + basic setup. + + + + pg_hba.conf needs to be adjusted to allow replication + (the values here depend on your actual network configuration and user you + want to use for connecting): + +host replication repuser 0.0.0.0/0 md5 + + + + + Then on the publisher database: + +CREATE PUBLICATION mypub FOR TABLE users, departments; + + + + + And on the subscriber database: + +CREATE SUBSCRIPTION mysub CONNECTION 'dbname=foo host=bar user=repuser' PUBLICATION mypub; + + + + + The above will start the replication process of changes to + users and departments tables. + + + diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 1545f03656..01fad3870f 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -308,6 +308,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_subscriptionpg_stat_subscription + At least one row per subscription, showing information about + the subscription workers. + See for details. + + + pg_stat_sslpg_stat_ssl One row per connection (regular and replication), showing information about @@ -1545,6 +1553,72 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i connected server. + + <structname>pg_stat_subscription</structname> View + + + + Column + Type + Description + + + + + + subid + oid + OID of the subscription + + + subname + text + Name of the subscription + + + pid + integer + Process ID of the subscription worker process + + + received_lsn + pg_lsn + Last transaction log position received, the initial value of + this field being 0 + + + last_msg_send_time + timestamp with time zone + Send time of last message received from origin WAL sender + + + last_msg_receipt_time + timestamp with time zone + Receipt time of last message received from origin WAL sender + + + + latest_end_lsn + pg_lsn + Last transaction log position reported to origin WAL sender + + + + latest_end_time + timestamp with time zone + Time of last transaction log position reported to origin WAL + sender + + + +
+ + + The pg_stat_subscription view will contain one + row per subscription for main worker (with null PID if the worker is + not running). + + <structname>pg_stat_ssl</structname> View diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml index 9143917c49..4e169d1b18 100644 --- a/doc/src/sgml/postgres.sgml +++ b/doc/src/sgml/postgres.sgml @@ -160,6 +160,7 @@ &monitoring; &diskusage; &wal; + &logical-replication; ®ress; diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 9ba147cae5..5f89db5857 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2122,6 +2122,119 @@ The commands accepted in walsender mode are: + + Logical Streaming Replication Protocol + + + This section describes the logical replication protocol, which is the message + flow started by the START_REPLICATION + SLOT slot_name + LOGICAL replication command. + + + + The logical streaming replication protocol builds on the primitives of + the physical streaming replication protocol. + + + + Logical Streaming Replication Parameters + + + The logical replication START_REPLICATION command + accepts following parameters: + + + + + proto_version + + + + Protocol version. Currently only version 1 is + supported. + + + + + + + publication_names + + + + Comma separated list of publication names for which to subscribe + (receive changes). The individual publication names are treated + as standard objects names and can be quoted the same as needed. + + + + + + + + + + Logical Replication Protocol Messages + + + The individual protocol messages are discussed in the following + subsections. Individual messages are describer in + section. + + + + All top-level protocol messages begin with a message type byte. + While represented in code as a character, this is a signed byte with no + associated encoding. + + + + Since the streaming replication protocol supplies a message length there + is no need for top-level protocol messages to embed a length in their + header. + + + + + + Logical Replication Protocol Message Flow + + + With the exception of the START_REPLICATION command and + the replay progress messages, all information flows only from the backend + to the frontend. + + + + The logical replication protocol sends individual transactions one by one. + This means that all messages between a pair of Begin and Commit messages + belong to the same transaction. + + + + Every sent transaction contains zero or more DML messages (Insert, + Update, Delete). In case of a cascaded setup it can also contain Origin + messages. The origin message indicated that the transaction originated on + different replication node. Since a replication node in the scope of logical + replication protocol can be pretty much anything, the only identifier + is the origin name. It's downstream's responsibility to handle this as + needed (if needed). The Origin message is always sent before any DML + messages in the transaction. + + + + Every DML message contains an arbitrary relation ID, which can be mapped to + an ID in the Relation messages. The Relation messages describe the schema of the + given relation. The Relation message is sent for a given relation either + because it is the first time we send a DML message for given relation in the + current session or because the relation definition has changed since the + last Relation message was sent for it. The protocol assumes that the client + is capable of caching the metadata for as many relations as needed. + + + + Message Data Types @@ -5149,6 +5262,614 @@ not line breaks. + +Logical Replication Message Formats + + +This section describes the detailed format of each logical replication message. +These messages are returned either by the replication slot SQL interface or are +sent by a walsender. In case of a walsender they are encapsulated inside the replication +protocol WAL messages as described in +and generally obey same message flow as physical replication. + + + + + + +Begin + + + + + + + + Byte1('B') + + + + Identifies the message as a begin message. + + + + + + Int64 + + + + The final LSN of the transaction. + + + + + + Int64 + + + + Commit timestamp of the transaction. The value is in number + of microseconds since PostgreSQL epoch (2000-01-01). + + + + + + Int32 + + + + Xid of the transaction. + + + + + + + + + + + +Commit + + + + + + + + Byte1('C') + + + + Identifies the message as a commit message. + + + + + + Int64 + + + + The LSN of the commit. + + + + + + Int64 + + + + The end LSN of the transaction. + + + + + + Int64 + + + + Commit timestamp of the transaction. The value is in number + of microseconds since PostgreSQL epoch (2000-01-01). + + + + + + + + + + + +Origin + + + + + + + + Byte1('O') + + + + Identifies the message as an origin message. + + + + + + Int64 + + + + The LSN of the commit on the origin server. + + + + + + String + + + + Name of the origin. + + + + + + + + + Note that there can be multiple Origin messages inside a single transaction. + + + + + + + +Relation + + + + + + + + Byte1('R') + + + + Identifies the message as a relation message. + + + + + + Int32 + + + + ID of the relation. + + + + + + String + + + + Namespace (empty string for pg_catalog). + + + + + + String + + + + Relation name. + + + + + + + Int8 + + + + Replica identity setting for the relation (same as + relreplident in pg_class). + + + + + + + Int16 + + + + Number of columns. + + + + + Next, the following message part appears for each column: + + + + Int8 + + + + Flags for the column. Currently can be either 0 for no flags + or 1 which marks the column as part of the key. + + + + + + String + + + + Name of the column. + + + + + + + + + + + +Insert + + + + + + + + Byte1('I') + + + + Identifies the message as an insert message. + + + + + + Int32 + + + + ID of the relation corresponding to the ID in the relation + message. + + + + + + Byte1('N') + + + + Identifies the following TupleData message as a new tuple. + + + + + + + TupleData + + + + TupleData message part representing the contents of new tuple. + + + + + + + + + + + +Update + + + + + + + + Byte1('U') + + + + Identifies the message as an update message. + + + + + + Int32 + + + + ID of the relation corresponding to the ID in the relation + message. + + + + + + + Byte1('K') + + + + Identifies the following TupleData submessage as a key. + This field is optional and is only present if + the update changed data in any of the column(s) that are + part of the REPLICA IDENTITY index. + + + + + + + Byte1('O') + + + + Identifies the following TupleData submessage as an old tuple. + This field is optional and is only present if table in which + the update happened has REPLICA IDENTITY set to FULL. + + + + + + + TupleData + + + + TupleData message part representing the contents of the old tuple + or primary key. Only present if the previous 'O' or 'K' part + is present. + + + + + + + Byte1('N') + + + + Identifies the following TupleData message as a new tuple. + + + + + + + TupleData + + + + TupleData message part representing the contents of a new tuple. + + + + + + + + + The Update message may contain either a 'K' message part or an 'O' message part + or neither of them, but never both of them. + + + + + + + +Delete + + + + + + + + Byte1('D') + + + + Identifies the message as a delete message. + + + + + + Int32 + + + + ID of the relation corresponding to the ID in the relation + message. + + + + + + + Byte1('K') + + + + Identifies the following TupleData submessage as a key. + This field is present if the table in which the delete has + happened uses an index as REPLICA IDENTITY. + + + + + + + Byte1('O') + + + + Identifies the following TupleData message as a old tuple. + This field is is present if the table in which the delete has + happened has REPLICA IDENTITY set to FULL. + + + + + + + TupleData + + + + TupleData message part representing the contents of the old tuple + or primary key, depending on the previous field. + + + + + + + + The Delete message may contain either a 'K' message part or an 'O' message part, + but never both of them. + + + + + + + + + +Following message parts that are shared by above messages. + + + + + + + +TupleData + + + + + + + + Int16 + + + + Number of columns. + + + + + Next, one of the following submessages appears for each column: + + + + Byte1('n') + + + + Idenfifies the data as NULL value. + + + + + Or + + + + Byte1('u') + + + + Idenfifies unchanged TOASTed value (the actual value is not + sent). + + + + + Or + + + + Byte1('t') + + + + Idenfifies the data as text formatted value. + + + + + + Int32 + + + + Length of the column value. + + + + + + String + + + + The text value. + + + + + + + + + + + + + Summary of Changes since Protocol 2.0 diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 77667bdebd..0d09f81ccc 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -26,11 +26,13 @@ Complete list of usable sgml source files in this directory. + + @@ -72,11 +74,13 @@ Complete list of usable sgml source files in this directory. + + @@ -116,11 +120,13 @@ Complete list of usable sgml source files in this directory. + + diff --git a/doc/src/sgml/ref/alter_publication.sgml b/doc/src/sgml/ref/alter_publication.sgml new file mode 100644 index 0000000000..47d83b80be --- /dev/null +++ b/doc/src/sgml/ref/alter_publication.sgml @@ -0,0 +1,139 @@ + + + + + ALTER PUBLICATION + + + + ALTER PUBLICATION + 7 + SQL - Language Statements + + + + ALTER PUBLICATION + change the definition of a publication + + + + +ALTER PUBLICATION name WITH ( option [, ... ] ) + +where option can be: + + PUBLISH INSERT | NOPUBLISH INSERT + | PUBLISH UPDATE | NOPUBLISH UPDATE + | PUBLISH DELETE | NOPUBLISH DELETE + +ALTER PUBLICATION name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } +ALTER PUBLICATION name ADD TABLE table_name [, ...] +ALTER PUBLICATION name SET TABLE table_name [, ...] +ALTER PUBLICATION name DROP TABLE table_name [, ...] + + + + + Description + + + The first variant of this command listed in the synopsis can change + all of the publication properties specified in + . Properties not mentioned in the + command retain their previous settings. Database superusers can change any + of these settings for any role. + + + + To alter the owner, you must also be a direct or indirect member of the + new owning role. The new owner has to be a superuser + + + + The other variants of this command deal with the table membership of the + publication. The SET TABLE clause will replace the + list of tables in the publication with the specified one. + The ADD TABLE and + DROP TABLE will add and remove one or more tables from + the publication. + + + + + Parameters + + + + name + + + The name of an existing publication whose definition is to be altered. + + + + + + PUBLISH INSERT + NOPUBLISH INSERT + PUBLISH UPDATE + NOPUBLISH UPDATE + PUBLISH DELETE + NOPUBLISH DELETE + + + These clauses alter properties originally set by + . See there for more information. + + + + + + table_name + + + Name of an existing table. + + + + + + + + Examples + + + Change the publication to not publish inserts: + +ALTER PUBLICATION noinsert WITH (NOPUBLISH INSERT); + + + + + Add some tables to the publication: + +ALTER PUBLICATION mypublication ADD TABLE users, departments; + + + + + + Compatibility + + + ALTER PUBLICATION is a PostgreSQL + extension. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/alter_subscription.sgml b/doc/src/sgml/ref/alter_subscription.sgml new file mode 100644 index 0000000000..032ecbb885 --- /dev/null +++ b/doc/src/sgml/ref/alter_subscription.sgml @@ -0,0 +1,139 @@ + + + + + ALTER SUBSCRIPTION + + + + ALTER SUBSCRIPTION + 7 + SQL - Language Statements + + + + ALTER SUBSCRIPTION + change the definition of a subscription + + + + +ALTER SUBSCRIPTION name WITH ( option [, ... ] ) ] + +where option can be: + + SLOT NAME = slot_name + +ALTER SUBSCRIPTION name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } +ALTER SUBSCRIPTION name CONNECTION 'conninfo' +ALTER SUBSCRIPTION name SET PUBLICATION publication_name [, ...] +ALTER SUBSCRIPTION name ENABLE +ALTER SUBSCRIPTION name DISABLE + + + + + Description + + + ALTER SUBSCRIPTION can change most of the subscription + properties that can be specified + in . + + + + To alter the owner, you must also be a direct or indirect member of the + new owning role. The new owner has to be a superuser + + + + + Parameters + + + + name + + + The name of a subscription whose properties are to be altered. + + + + + + CONNECTION 'conninfo' + SET PUBLICATION publication_name + SLOT NAME = slot_name + + + These clauses alter properties originally set by + . See there for more + information. + + + + + + ENABLE + + + Enables the previously disabled subscription, starting the logical + replication worker at the end of transaction. + + + + + + DISABLE + + + Disables the running subscription, stopping the logical replication + worker at the end of transaction. + + + + + + + + Examples + + + Change the publication subscribed by a subscription to + insert_only: + +ALTER SUBSCRIPTION mysub SET PUBLICATION insert_only; + + + + + Disable (stop) the subscription: + +ALTER SUBSCRIPTION mysub DISABLE; + + + + + + Compatibility + + + ALTER SUBSCRIPTION is a PostgreSQL + extension. + + + + + See Also + + + + + + + + + diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml new file mode 100644 index 0000000000..995f2bcf3c --- /dev/null +++ b/doc/src/sgml/ref/create_publication.sgml @@ -0,0 +1,206 @@ + + + + + CREATE PUBLICATION + + + + CREATE PUBLICATION + 7 + SQL - Language Statements + + + + CREATE PUBLICATION + define a new publication + + + + +CREATE PUBLICATION name + [ FOR TABLE table_name [, ...] + | FOR ALL TABLES ] + [ WITH ( option [, ... ] ) ] + +where option can be: + + PUBLISH INSERT | NOPUBLISH INSERT + | PUBLISH UPDATE | NOPUBLISH UPDATE + | PUBLISH DELETE | NOPUBLISH DELETE + + + + + Description + + + CREATE PUBLICATION adds a new publication + into the current database. The publication name must be distinct from + the name of any existing publication in the current database. + + + + A publication is essentially a group of tables whose data changes are + intended to be replicated through logical replication. See + for details about how + publications fit into the logical replication setup. + + + + + Parameters + + + + name + + + The name of the new publication. + + + + + + FOR TABLE + + + Specifies a list of tables to add to the publication. + + + + + + FOR ALL TABLES + + + Marks the publication as one that replicates changes for all tables in + the database, including tables created in the future. + + + + + + PUBLISH INSERT + NOPUBLISH INSERT + + + These clauses determine whether the new publication will send + the INSERT operations to the subscribers. + PUBLISH INSERT is the default. + + + + + + PUBLISH UPDATE + NOPUBLISH UPDATE + + + These clauses determine whether the new publication will send + the UPDATE operations to the subscribers. + PUBLISH UPDATE is the default. + + + + + + PUBLISH DELETE + NOPUBLISH DELETE + + + These clauses determine whether the new publication will send + the DELETE operations to the subscribers. + PUBLISH DELETE is the default. + + + + + + + + + Notes + + + If neither FOR TABLE nor FOR ALL + TABLES is specified, then the publication starts out with an + empty set of tables. That is useful if tables are to be added later. + + + + The creation of a publication does not start replication. It only defines + a grouping and filtering logic for future subscribers. + + + + To create a publication, the invoking user must have the + CREATE privilege for the current database. + (Of course, superusers bypass this check.) + + + + To add a table to a publication, the invoking user must have + SELECT privilege on given table. The + FOR ALL TABLES clause requires superuser. + + + + The tables added to a publication that publishes UPDATE + and/or DELETE operations must have + REPLICA IDENTITY defined. Otherwise those operations will be + disallowed on those tables. + + + + For an INSERT ... ON CONFLICT command, the publication will + publish the operation that actually results from the command. So depending + of the outcome, it may be published as either INSERT or + UPDATE, or it may not be published at all. + + + + TRUNCATE and other DDL operations + are not published. + + + + + Examples + + + Create a simple publication that just publishes all DML for tables in it: + +CREATE PUBLICATION mypublication; + + + + + Create an insert-only publication: + +CREATE PUBLICATION insert_only WITH (NOPUBLISH UPDATE, NOPUBLISH DELETE); + + + + + + Compatibility + + + CREATE PUBLICATION is a PostgreSQL + extension. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/create_subscription.sgml b/doc/src/sgml/ref/create_subscription.sgml new file mode 100644 index 0000000000..40d08b3440 --- /dev/null +++ b/doc/src/sgml/ref/create_subscription.sgml @@ -0,0 +1,176 @@ + + + + + CREATE SUBSCRIPTION + + + + CREATE SUBSCRIPTION + 7 + SQL - Language Statements + + + + CREATE SUBSCRIPTION + define a new subscription + + + + +CREATE SUBSCRIPTION subscription_name CONNECTION 'conninfo' PUBLICATION { publication_name [, ...] } [ WITH ( option [, ... ] ) ] + +where option can be: + + | ENABLED | DISABLED + | CREATE SLOT | NOCREATE SLOT + | SLOT NAME = slot_name + + + + + Description + + + CREATE SUBSCRIPTION adds a new subscription for a + current database. The subscription name must be distinct from the name of + any existing subscription in the database. + + + + The subscription represents a replication connection to the publisher. As + such this command does not only add definitions in the local catalogs but + also creates a replication slot on the publisher. + + + + A logical replication worker will be started to replicate data for the new + subscription at the commit of the transaction where this command is run. + + + + Additional info about subscriptions and logical replication as a whole + can is available at and + . + + + + + + Parameters + + + + subscription_name + + + The name of the new subscription. + + + + + + CONNECTION 'conninfo' + + + The connection string to the publisher. + + + + + + PUBLICATION publication_name + + + Name(s) of the publications on the publisher to subscribe to. + + + + + + ENABLED + DISABLED + + + Specifies whether the subscription should be actively replicating or + if it should be just setup but not started yet. Note that the + replication slot as described above is created in either case. + ENABLED is the default. + + + + + + CREATE SLOT + NOCREATE SLOT + + + Specifies whether the command should create the replication slot on the + publisher. CREATE SLOT is the default. + + + + + + SLOT NAME = slot_name + + + Name of the replication slot to use. The default behavior is to use + subscription_name for slot name. + + + + + + + + Examples + + + Create a subscription to a remote server that replicates tables in + the publications mypubclication and + insert_only and starts replicating immediately on + commit: + +CREATE SUBSCRIPTION mysub + CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb password=foopass' + PUBLICATION mypublication, insert_only; + + + + + Create a subscription to a remote server that replicates tables in + the insert_only publication and does not start replicating + until enabled at a later time. + +CREATE SUBSCRIPTION mysub + CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb password=foopass' + PUBLICATION insert_only + WITH (DISABLED); + + + + + + Compatibility + + + CREATE SUBSCRIPTION is a PostgreSQL + extension. + + + + + See Also + + + + + + + + + diff --git a/doc/src/sgml/ref/drop_publication.sgml b/doc/src/sgml/ref/drop_publication.sgml new file mode 100644 index 0000000000..1a1be579ad --- /dev/null +++ b/doc/src/sgml/ref/drop_publication.sgml @@ -0,0 +1,107 @@ + + + + + DROP PUBLICATION + + + + DROP PUBLICATION + 7 + SQL - Language Statements + + + + DROP PUBLICATION + remove a publication + + + + +DROP PUBLICATION [ IF EXISTS ] name [, ...] [ CASCADE | RESTRICT ] + + + + + Description + + + DROP PUBLICATION removes an existing publication from + the database. + + + + A publication can only be dropped by its owner or a superuser. + + + + + Parameters + + + + IF EXISTS + + + Do not throw an error if the extension does not exist. A notice is issued + in this case. + + + + + + name + + + The name of an existing publication. + + + + + + CASCADE + RESTRICT + + + + These key words do not have any effect, since there are no dependencies + on publications. + + + + + + + + Examples + + + Drop a publication: + +DROP PUBLICATION mypublication; + + + + + + + Compatibility + + + DROP PUBLICATION is a PostgreSQL + extension. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/drop_subscription.sgml b/doc/src/sgml/ref/drop_subscription.sgml new file mode 100644 index 0000000000..9f2fb93275 --- /dev/null +++ b/doc/src/sgml/ref/drop_subscription.sgml @@ -0,0 +1,110 @@ + + + + + DROP SUBSCRIPTION + + + + DROP SUBSCRIPTION + 7 + SQL - Language Statements + + + + DROP SUBSCRIPTION + remove a subscription + + + + +DROP SUBSCRIPTION [ IF EXISTS ] name [ DROP SLOT | NODROP SLOT ] + + + + + Description + + + DROP SUBSCRIPTION removes a subscription from the + database cluster. + + + + A subscription can only be dropped by a superuser. + + + + The replication worker associated with the subscription will not stop until + after the transaction that issued this command has committed. + + + + + Parameters + + + + name + + + The name of a subscription to be dropped. + + + + + + DROP SLOT + NODROP SLOT + + + Specifies whether to drop the replication slot on the publisher. The + default is + DROP SLOT. + + + + If the publisher is not reachable when the subscription is to be + dropped, then it is useful to specify NODROP SLOT. + But the replication slot on the publisher will then have to be removed + manually. + + + + + + + + + Examples + + + Drop a subscription: + +DROP SUBSCRIPTION mysub; + + + + + + + Compatibility + + + DROP SUBSCRIPTION is a PostgreSQL + extension. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index b70e7d57e9..a1e03c481d 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -755,6 +755,15 @@ PostgreSQL documentation + + + + + Include logical replication subscriptions in the dump. + + + + @@ -789,6 +798,18 @@ PostgreSQL documentation + + + + + When dumping logical replication subscriptions, + generate CREATE SUBSCRIPTION commands that do not + create the remote replication slot. That way, the dump can be + restored without requiring network access to the remote servers. + + + + diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml index 991573121b..640fe12bbf 100644 --- a/doc/src/sgml/ref/psql-ref.sgml +++ b/doc/src/sgml/ref/psql-ref.sgml @@ -1600,6 +1600,34 @@ testdb=> + + \dRp[+] [ pattern ] + + + Lists replication publications. + If pattern is + specified, only those publications whose names match the pattern are + listed. + If + is appended to the command name, the tables + associated with each publication are shown as well. + + + + + + \dRs[+] [ pattern ] + + + Lists replication subscriptions. + If pattern is + specified, only those subscriptions whose names match the pattern are + listed. + If + is appended to the command name, additional + properties of the subscriptions are shown. + + + + \dT[S+] [ pattern ] diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index 8acdff1393..34007d3508 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -54,11 +54,13 @@ &alterOperatorClass; &alterOperatorFamily; &alterPolicy; + &alterPublication; &alterRole; &alterRule; &alterSchema; &alterSequence; &alterServer; + &alterSubscription; &alterSystem; &alterTable; &alterTableSpace; @@ -100,11 +102,13 @@ &createOperatorClass; &createOperatorFamily; &createPolicy; + &createPublication; &createRole; &createRule; &createSchema; &createSequence; &createServer; + &createSubscription; &createTable; &createTableAs; &createTableSpace; @@ -144,11 +148,13 @@ &dropOperatorFamily; &dropOwned; &dropPolicy; + &dropPublication; &dropRole; &dropRule; &dropSchema; &dropSequence; &dropServer; + &dropSubscription; &dropTable; &dropTableSpace; &dropTSConfig; diff --git a/src/Makefile b/src/Makefile index 977f80b469..b490c07138 100644 --- a/src/Makefile +++ b/src/Makefile @@ -22,6 +22,7 @@ SUBDIRS = \ include \ interfaces \ backend/replication/libpqwalreceiver \ + backend/replication/pgoutput \ fe_utils \ bin \ pl \ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index f5346f024e..f6f136da3a 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -42,6 +42,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "replication/logical.h" +#include "replication/logicallauncher.h" #include "replication/origin.h" #include "replication/syncrep.h" #include "replication/walsender.h" @@ -2135,6 +2136,7 @@ CommitTransaction(void) AtEOXact_HashTables(true); AtEOXact_PgStat(true); AtEOXact_Snapshot(true); + AtCommit_ApplyLauncher(); pgstat_report_xact_timestamp(0); CurrentResourceOwner = NULL; diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index cd38c8ab3f..31368585d2 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -14,8 +14,9 @@ OBJS = catalog.o dependency.o heap.o index.o indexing.o namespace.o aclchk.o \ objectaccess.o objectaddress.o partition.o pg_aggregate.o pg_collation.o \ pg_constraint.o pg_conversion.o \ pg_depend.o pg_enum.o pg_inherits.o pg_largeobject.o pg_namespace.o \ - pg_operator.o pg_proc.o pg_range.o pg_db_role_setting.o pg_shdepend.o \ - pg_type.o storage.o toasting.o + pg_operator.o pg_proc.o pg_publication.o pg_range.o \ + pg_db_role_setting.o pg_shdepend.o pg_subscription.o pg_type.o \ + storage.o toasting.o BKIFILES = postgres.bki postgres.description postgres.shdescription @@ -42,7 +43,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_foreign_table.h pg_policy.h pg_replication_origin.h \ pg_default_acl.h pg_init_privs.h pg_seclabel.h pg_shseclabel.h \ pg_collation.h pg_partitioned_table.h pg_range.h pg_transform.h \ - pg_sequence.h \ + pg_sequence.h pg_publication.h pg_publication_rel.h pg_subscription.h \ toasting.h indexing.h \ ) diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 640632784c..a96bf692df 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -45,6 +45,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "catalog/pg_ts_config.h" @@ -3390,6 +3391,10 @@ static const char *const not_owner_msg[MAX_ACL_KIND] = gettext_noop("must be owner of event trigger %s"), /* ACL_KIND_EXTENSION */ gettext_noop("must be owner of extension %s"), + /* ACL_KIND_PUBLICATION */ + gettext_noop("must be owner of publication %s"), + /* ACL_KIND_SUBSCRIPTION */ + gettext_noop("must be owner of subscription %s"), }; @@ -5071,6 +5076,58 @@ pg_extension_ownercheck(Oid ext_oid, Oid roleid) return has_privs_of_role(roleid, ownerId); } +/* + * Ownership check for an publication (specified by OID). + */ +bool +pg_publication_ownercheck(Oid pub_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pub_oid)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication with OID %u does not exist", pub_oid))); + + ownerId = ((Form_pg_publication) GETSTRUCT(tuple))->pubowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + +/* + * Ownership check for an subscription (specified by OID). + */ +bool +pg_subscription_ownercheck(Oid sub_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(sub_oid)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription with OID %u does not exist", sub_oid))); + + ownerId = ((Form_pg_subscription) GETSTRUCT(tuple))->subowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + /* * Check whether specified role has CREATEROLE privilege (or is a superuser) * diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index c3b4d298ce..11ee536726 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -36,6 +36,7 @@ #include "catalog/pg_shdepend.h" #include "catalog/pg_shdescription.h" #include "catalog/pg_shseclabel.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/toasting.h" #include "miscadmin.h" @@ -227,7 +228,8 @@ IsSharedRelation(Oid relationId) relationId == SharedSecLabelRelationId || relationId == TableSpaceRelationId || relationId == DbRoleSettingRelationId || - relationId == ReplicationOriginRelationId) + relationId == ReplicationOriginRelationId || + relationId == SubscriptionRelationId) return true; /* These are their indexes (see indexing.h) */ if (relationId == AuthIdRolnameIndexId || @@ -245,7 +247,9 @@ IsSharedRelation(Oid relationId) relationId == TablespaceNameIndexId || relationId == DbRoleSettingDatidRolidIndexId || relationId == ReplicationOriginIdentIndex || - relationId == ReplicationOriginNameIndex) + relationId == ReplicationOriginNameIndex || + relationId == SubscriptionObjectIndexId || + relationId == SubscriptionNameIndexId) return true; /* These are their toast tables and toast indexes (see toasting.h) */ if (relationId == PgShdescriptionToastTable || diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 359719e450..1c43af6eff 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -48,7 +48,10 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_policy.h" #include "catalog/pg_proc.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" #include "catalog/pg_trigger.h" @@ -64,6 +67,7 @@ #include "commands/extension.h" #include "commands/policy.h" #include "commands/proclang.h" +#include "commands/publicationcmds.h" #include "commands/schemacmds.h" #include "commands/seclabel.h" #include "commands/sequence.h" @@ -164,6 +168,9 @@ static const Oid object_classes[] = { ExtensionRelationId, /* OCLASS_EXTENSION */ EventTriggerRelationId, /* OCLASS_EVENT_TRIGGER */ PolicyRelationId, /* OCLASS_POLICY */ + PublicationRelationId, /* OCLASS_PUBLICATION */ + PublicationRelRelationId, /* OCLASS_PUBLICATION_REL */ + SubscriptionRelationId, /* OCLASS_SUBSCRIPTION */ TransformRelationId /* OCLASS_TRANSFORM */ }; @@ -1244,6 +1251,14 @@ doDeletion(const ObjectAddress *object, int flags) RemovePolicyById(object->objectId); break; + case OCLASS_PUBLICATION: + RemovePublicationById(object->objectId); + break; + + case OCLASS_PUBLICATION_REL: + RemovePublicationRelById(object->objectId); + break; + case OCLASS_TRANSFORM: DropTransformById(object->objectId); break; @@ -2404,6 +2419,15 @@ getObjectClass(const ObjectAddress *object) case PolicyRelationId: return OCLASS_POLICY; + case PublicationRelationId: + return OCLASS_PUBLICATION; + + case PublicationRelRelationId: + return OCLASS_PUBLICATION_REL; + + case SubscriptionRelationId: + return OCLASS_SUBSCRIPTION; + case TransformRelationId: return OCLASS_TRANSFORM; } diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 2b1808b0f9..44d14ae2b1 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -45,7 +45,10 @@ #include "catalog/pg_operator.h" #include "catalog/pg_proc.h" #include "catalog/pg_policy.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" #include "catalog/pg_trigger.h" @@ -450,6 +453,30 @@ static const ObjectPropertyType ObjectProperty[] = Anum_pg_type_typacl, ACL_KIND_TYPE, true + }, + { + PublicationRelationId, + PublicationObjectIndexId, + PUBLICATIONOID, + PUBLICATIONNAME, + Anum_pg_publication_pubname, + InvalidAttrNumber, + Anum_pg_publication_pubowner, + InvalidAttrNumber, + -1, + true + }, + { + SubscriptionRelationId, + SubscriptionObjectIndexId, + SUBSCRIPTIONOID, + SUBSCRIPTIONNAME, + Anum_pg_subscription_subname, + InvalidAttrNumber, + Anum_pg_subscription_subowner, + InvalidAttrNumber, + -1, + true } }; @@ -653,6 +680,18 @@ static const struct object_type_map { "policy", OBJECT_POLICY }, + /* OCLASS_PUBLICATION */ + { + "publication", OBJECT_PUBLICATION + }, + /* OCLASS_PUBLICATION_REL */ + { + "publication relation", OBJECT_PUBLICATION_REL + }, + /* OCLASS_SUBSCRIPTION */ + { + "subscription", OBJECT_SUBSCRIPTION + }, /* OCLASS_TRANSFORM */ { "transform", OBJECT_TRANSFORM @@ -688,6 +727,9 @@ static ObjectAddress get_object_address_opf_member(ObjectType objtype, static ObjectAddress get_object_address_usermapping(List *objname, List *objargs, bool missing_ok); +static ObjectAddress get_object_address_publication_rel(List *objname, + List *objargs, Relation *relation, + bool missing_ok); static ObjectAddress get_object_address_defacl(List *objname, List *objargs, bool missing_ok); static const ObjectPropertyType *get_object_property_data(Oid class_id); @@ -812,6 +854,8 @@ get_object_address(ObjectType objtype, List *objname, List *objargs, case OBJECT_FOREIGN_SERVER: case OBJECT_EVENT_TRIGGER: case OBJECT_ACCESS_METHOD: + case OBJECT_PUBLICATION: + case OBJECT_SUBSCRIPTION: address = get_object_address_unqualified(objtype, objname, missing_ok); break; @@ -926,6 +970,10 @@ get_object_address(ObjectType objtype, List *objname, List *objargs, address = get_object_address_usermapping(objname, objargs, missing_ok); break; + case OBJECT_PUBLICATION_REL: + address = get_object_address_publication_rel(objname, objargs, + &relation, + missing_ok); case OBJECT_DEFACL: address = get_object_address_defacl(objname, objargs, missing_ok); @@ -1091,6 +1139,12 @@ get_object_address_unqualified(ObjectType objtype, case OBJECT_EVENT_TRIGGER: msg = gettext_noop("event trigger name cannot be qualified"); break; + case OBJECT_PUBLICATION: + msg = gettext_noop("publication name cannot be qualified"); + break; + case OBJECT_SUBSCRIPTION: + msg = gettext_noop("subscription name cannot be qualified"); + break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); msg = NULL; /* placate compiler */ @@ -1156,6 +1210,16 @@ get_object_address_unqualified(ObjectType objtype, address.objectId = get_event_trigger_oid(name, missing_ok); address.objectSubId = 0; break; + case OBJECT_PUBLICATION: + address.classId = PublicationRelationId; + address.objectId = get_publication_oid(name, missing_ok); + address.objectSubId = 0; + break; + case OBJECT_SUBSCRIPTION: + address.classId = SubscriptionRelationId; + address.objectId = get_subscription_oid(name, missing_ok); + address.objectSubId = 0; + break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); /* placate compiler, which doesn't know elog won't return */ @@ -1743,6 +1807,51 @@ get_object_address_usermapping(List *objname, List *objargs, bool missing_ok) return address; } +/* + * Find the ObjectAddress for a publication relation. The objname parameter + * is the relation name; objargs contains the publication name. + */ +static ObjectAddress +get_object_address_publication_rel(List *objname, List *objargs, + Relation *relation, bool missing_ok) +{ + ObjectAddress address; + char *pubname; + Publication *pub; + + ObjectAddressSet(address, PublicationRelRelationId, InvalidOid); + + *relation = relation_openrv_extended(makeRangeVarFromNameList(objname), + AccessShareLock, missing_ok); + if (!relation) + return address; + + /* fetch publication name from input list */ + pubname = strVal(linitial(objargs)); + + /* Now look up the pg_publication tuple */ + pub = GetPublicationByName(pubname, missing_ok); + if (!pub) + return address; + + /* Find the publication relation mapping in syscache. */ + address.objectId = + GetSysCacheOid2(PUBLICATIONRELMAP, + ObjectIdGetDatum(RelationGetRelid(*relation)), + ObjectIdGetDatum(pub->oid)); + if (!OidIsValid(address.objectId)) + { + if (!missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication relation \"%s\" in publication \"%s\" does not exist", + RelationGetRelationName(*relation), pubname))); + return address; + } + + return address; +} + /* * Find the ObjectAddress for a default ACL. */ @@ -2002,6 +2111,7 @@ pg_get_object_address(PG_FUNCTION_ARGS) case OBJECT_DOMCONSTRAINT: case OBJECT_CAST: case OBJECT_USER_MAPPING: + case OBJECT_PUBLICATION_REL: case OBJECT_DEFACL: case OBJECT_TRANSFORM: if (list_length(args) != 1) @@ -2183,6 +2293,16 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address, format_type_be(targettypeid)))); } break; + case OBJECT_PUBLICATION: + if (!pg_publication_ownercheck(address.objectId, roleid)) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION, + NameListToString(objname)); + break; + case OBJECT_SUBSCRIPTION: + if (!pg_subscription_ownercheck(address.objectId, roleid)) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION, + NameListToString(objname)); + break; case OBJECT_TRANSFORM: { TypeName *typename = (TypeName *) linitial(objname); @@ -3191,6 +3311,41 @@ getObjectDescription(const ObjectAddress *object) break; } + case OCLASS_PUBLICATION: + { + appendStringInfo(&buffer, _("publication %s"), + get_publication_name(object->objectId)); + break; + } + + case OCLASS_PUBLICATION_REL: + { + HeapTuple tup; + char *pubname; + Form_pg_publication_rel prform; + + tup = SearchSysCache1(PUBLICATIONREL, + ObjectIdGetDatum(object->objectId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication table %u", + object->objectId); + + prform = (Form_pg_publication_rel) GETSTRUCT(tup); + pubname = get_publication_name(prform->prpubid); + + appendStringInfo(&buffer, _("publication table %s in publication %s"), + get_rel_name(prform->prrelid), pubname); + ReleaseSysCache(tup); + break; + } + + case OCLASS_SUBSCRIPTION: + { + appendStringInfo(&buffer, _("subscription %s"), + get_subscription_name(object->objectId)); + break; + } + default: appendStringInfo(&buffer, "unrecognized object %u %u %d", object->classId, @@ -3677,6 +3832,18 @@ getObjectTypeDescription(const ObjectAddress *object) appendStringInfoString(&buffer, "access method"); break; + case OCLASS_PUBLICATION: + appendStringInfoString(&buffer, "publication"); + break; + + case OCLASS_PUBLICATION_REL: + appendStringInfoString(&buffer, "publication table"); + break; + + case OCLASS_SUBSCRIPTION: + appendStringInfoString(&buffer, "subscription"); + break; + default: appendStringInfo(&buffer, "unrecognized %u", object->classId); break; @@ -4648,6 +4815,58 @@ getObjectIdentityParts(const ObjectAddress *object, } break; + case OCLASS_PUBLICATION: + { + char *pubname; + + pubname = get_publication_name(object->objectId); + appendStringInfoString(&buffer, + quote_identifier(pubname)); + if (objname) + *objname = list_make1(pubname); + break; + } + + case OCLASS_PUBLICATION_REL: + { + HeapTuple tup; + char *pubname; + Form_pg_publication_rel prform; + + tup = SearchSysCache1(PUBLICATIONREL, + ObjectIdGetDatum(object->objectId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication table %u", + object->objectId); + + prform = (Form_pg_publication_rel) GETSTRUCT(tup); + pubname = get_publication_name(prform->prpubid); + + appendStringInfo(&buffer, _("publication table %s in publication %s"), + get_rel_name(prform->prrelid), pubname); + + if (objname) + { + getRelationIdentity(&buffer, prform->prrelid, objname); + *objargs = list_make1(pubname); + } + + ReleaseSysCache(tup); + break; + } + + case OCLASS_SUBSCRIPTION: + { + char *subname; + + subname = get_subscription_name(object->objectId); + appendStringInfoString(&buffer, + quote_identifier(subname)); + if (objname) + *objname = list_make1(subname); + break; + } + default: appendStringInfo(&buffer, "unrecognized object %u %u %d", object->classId, diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c new file mode 100644 index 0000000000..576b7faa04 --- /dev/null +++ b/src/backend/catalog/pg_publication.c @@ -0,0 +1,457 @@ +/*------------------------------------------------------------------------- + * + * pg_publication.c + * publication C API manipulation + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * pg_publication.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/hash.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" + +#include "catalog/catalog.h" +#include "catalog/dependency.h" +#include "catalog/index.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_type.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" + +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +/* + * Check if relation can be in given publication and throws appropriate + * error if not. + */ +static void +check_publication_add_relation(Relation targetrel) +{ + /* Must be table */ + if (RelationGetForm(targetrel)->relkind != RELKIND_RELATION) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" is not a table", + RelationGetRelationName(targetrel)), + errdetail("Only tables can be added to publications."))); + + /* Can't be system table */ + if (IsCatalogRelation(targetrel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" is a system table", + RelationGetRelationName(targetrel)), + errdetail("System tables cannot be added to publications."))); + + /* UNLOGGED and TEMP relations cannot be part of publication. */ + if (!RelationNeedsWAL(targetrel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("table \"%s\" cannot be replicated", + RelationGetRelationName(targetrel)), + errdetail("Temporary and unlogged relations cannot be replicated."))); +} + +/* + * Returns if relation represented by oid and Form_pg_class entry + * is publishable. + * + * Does same checks as the above, but does not need relation to be opened + * and also does not throw errors. + */ +static bool +is_publishable_class(Oid relid, Form_pg_class reltuple) +{ + return reltuple->relkind == RELKIND_RELATION && + !IsCatalogClass(relid, reltuple) && + reltuple->relpersistence == RELPERSISTENCE_PERMANENT && + /* + * Also exclude any tables created as part of initdb. This mainly + * affects the preinstalled information_schema. + * Note that IsCatalogClass() only checks for these inside pg_catalog + * and toast schemas. + */ + relid >= FirstNormalObjectId; +} + +/* + * Insert new publication / relation mapping. + */ +ObjectAddress +publication_add_relation(Oid pubid, Relation targetrel, + bool if_not_exists) +{ + Relation rel; + HeapTuple tup; + Datum values[Natts_pg_publication_rel]; + bool nulls[Natts_pg_publication_rel]; + Oid relid = RelationGetRelid(targetrel); + Oid prrelid; + Publication *pub = GetPublication(pubid); + ObjectAddress myself, + referenced; + + rel = heap_open(PublicationRelRelationId, RowExclusiveLock); + + /* + * Check for duplicates. Note that this does not really prevent + * duplicates, it's here just to provide nicer error message in common + * case. The real protection is the unique key on the catalog. + */ + if (SearchSysCacheExists2(PUBLICATIONRELMAP, ObjectIdGetDatum(relid), + ObjectIdGetDatum(pubid))) + { + heap_close(rel, RowExclusiveLock); + + if (if_not_exists) + return InvalidObjectAddress; + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("relation \"%s\" is already member of publication \"%s\"", + RelationGetRelationName(targetrel), pub->name))); + } + + check_publication_add_relation(targetrel); + + /* Form a tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + values[Anum_pg_publication_rel_prpubid - 1] = + ObjectIdGetDatum(pubid); + values[Anum_pg_publication_rel_prrelid - 1] = + ObjectIdGetDatum(relid); + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + /* Insert tuple into catalog. */ + prrelid = simple_heap_insert(rel, tup); + CatalogUpdateIndexes(rel, tup); + heap_freetuple(tup); + + ObjectAddressSet(myself, PublicationRelRelationId, prrelid); + + /* Add dependency on the publication */ + ObjectAddressSet(referenced, PublicationRelationId, pubid); + recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + + /* Add dependency on the relation */ + ObjectAddressSet(referenced, RelationRelationId, relid); + recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); + + /* Close the table. */ + heap_close(rel, RowExclusiveLock); + + /* Invalidate relcache so that publication info is rebuilt. */ + CacheInvalidateRelcache(targetrel); + + return myself; +} + + +/* + * Gets list of publication oids for a relation oid. + */ +List * +GetRelationPublications(Oid relid) +{ + List *result = NIL; + CatCList *pubrellist; + int i; + + /* Find all publications associated with the relation. */ + pubrellist = SearchSysCacheList1(PUBLICATIONRELMAP, + ObjectIdGetDatum(relid)); + for (i = 0; i < pubrellist->n_members; i++) + { + HeapTuple tup = &pubrellist->members[i]->tuple; + Oid pubid = ((Form_pg_publication_rel) GETSTRUCT(tup))->prpubid; + + result = lappend_oid(result, pubid); + } + + ReleaseSysCacheList(pubrellist); + + return result; +} + +/* + * Gets list of relation oids for a publication. + * + * This should only be used for normal publications, the FOR ALL TABLES + * should use GetAllTablesPublicationRelations(). + */ +List * +GetPublicationRelations(Oid pubid) +{ + List *result; + Relation pubrelsrel; + ScanKeyData scankey; + SysScanDesc scan; + HeapTuple tup; + + /* Find all publications associated with the relation. */ + pubrelsrel = heap_open(PublicationRelRelationId, AccessShareLock); + + ScanKeyInit(&scankey, + Anum_pg_publication_rel_prpubid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(pubid)); + + scan = systable_beginscan(pubrelsrel, PublicationRelMapIndexId, true, + NULL, 1, &scankey); + + result = NIL; + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_publication_rel pubrel; + + pubrel = (Form_pg_publication_rel) GETSTRUCT(tup); + + result = lappend_oid(result, pubrel->prrelid); + } + + systable_endscan(scan); + heap_close(pubrelsrel, AccessShareLock); + + return result; +} + +/* + * Gets list of publication oids for publications marked as FOR ALL TABLES. + */ +List * +GetAllTablesPublications(void) +{ + List *result; + Relation rel; + ScanKeyData scankey; + SysScanDesc scan; + HeapTuple tup; + + /* Find all publications that are marked as for all tables. */ + rel = heap_open(PublicationRelationId, AccessShareLock); + + ScanKeyInit(&scankey, + Anum_pg_publication_puballtables, + BTEqualStrategyNumber, F_BOOLEQ, + BoolGetDatum(true)); + + scan = systable_beginscan(rel, InvalidOid, false, + NULL, 1, &scankey); + + result = NIL; + while (HeapTupleIsValid(tup = systable_getnext(scan))) + result = lappend_oid(result, HeapTupleGetOid(tup)); + + systable_endscan(scan); + heap_close(rel, AccessShareLock); + + return result; +} + +/* + * Gets list of all relation published by FOR ALL TABLES publication(s). + */ +List * +GetAllTablesPublicationRelations(void) +{ + Relation classRel; + ScanKeyData key[1]; + HeapScanDesc scan; + HeapTuple tuple; + List *result = NIL; + + classRel = heap_open(RelationRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_class_relkind, + BTEqualStrategyNumber, F_CHAREQ, + CharGetDatum(RELKIND_RELATION)); + + scan = heap_beginscan_catalog(classRel, 1, key); + + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + Oid relid = HeapTupleGetOid(tuple); + Form_pg_class relForm = (Form_pg_class) GETSTRUCT(tuple); + + if (is_publishable_class(relid, relForm)) + result = lappend_oid(result, relid); + } + + heap_endscan(scan); + heap_close(classRel, AccessShareLock); + + return result; +} + +/* + * Get publication using oid + * + * The Publication struct and it's data are palloced here. + */ +Publication * +GetPublication(Oid pubid) +{ + HeapTuple tup; + Publication *pub; + Form_pg_publication pubform; + + tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication %u", pubid); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + pub = (Publication *) palloc(sizeof(Publication)); + pub->oid = pubid; + pub->name = pstrdup(NameStr(pubform->pubname)); + pub->alltables = pubform->puballtables; + pub->pubactions.pubinsert = pubform->pubinsert; + pub->pubactions.pubupdate = pubform->pubupdate; + pub->pubactions.pubdelete = pubform->pubdelete; + + ReleaseSysCache(tup); + + return pub; +} + + +/* + * Get Publication using name. + */ +Publication * +GetPublicationByName(const char *pubname, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname)); + if (!OidIsValid(oid)) + { + if (missing_ok) + return NULL; + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", pubname))); + } + + return GetPublication(oid); +} + +/* + * get_publication_oid - given a publication name, look up the OID + * + * If missing_ok is false, throw an error if name not found. If true, just + * return InvalidOid. + */ +Oid +get_publication_oid(const char *pubname, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(pubname)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", pubname))); + return oid; +} + +/* + * get_publication_name - given a publication Oid, look up the name + */ +char * +get_publication_name(Oid pubid) +{ + HeapTuple tup; + char *pubname; + Form_pg_publication pubform; + + tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication %u", pubid); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + pubname = pstrdup(NameStr(pubform->pubname)); + + ReleaseSysCache(tup); + + return pubname; +} + +/* + * Returns Oids of tables in a publication. + */ +Datum +pg_get_publication_tables(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + char *pubname = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Publication *publication; + List *tables; + ListCell **lcp; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + publication = GetPublicationByName(pubname, false); + if (publication->alltables) + tables = GetAllTablesPublicationRelations(); + else + tables = GetPublicationRelations(publication->oid); + lcp = (ListCell **) palloc(sizeof(ListCell *)); + *lcp = list_head(tables); + funcctx->user_fctx = (void *) lcp; + + MemoryContextSwitchTo(oldcontext); + } + + /* stuff done on every call of the function */ + funcctx = SRF_PERCALL_SETUP(); + lcp = (ListCell **) funcctx->user_fctx; + + while (*lcp != NULL) + { + Oid relid = lfirst_oid(*lcp); + + *lcp = lnext(*lcp); + SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(relid)); + } + + SRF_RETURN_DONE(funcctx); +} diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index fb39a01841..60ed957655 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -39,6 +39,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_shdepend.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" @@ -53,7 +54,9 @@ #include "commands/extension.h" #include "commands/policy.h" #include "commands/proclang.h" +#include "commands/publicationcmds.h" #include "commands/schemacmds.h" +#include "commands/subscriptioncmds.h" #include "commands/tablecmds.h" #include "commands/typecmds.h" #include "storage/lmgr.h" @@ -1406,6 +1409,14 @@ shdepReassignOwned(List *roleids, Oid newrole) AlterEventTriggerOwner_oid(sdepForm->objid, newrole); break; + case PublicationRelationId: + AlterPublicationOwner_oid(sdepForm->objid, newrole); + break; + + case SubscriptionRelationId: + AlterSubscriptionOwner_oid(sdepForm->objid, newrole); + break; + /* Generic alter owner cases */ case CollationRelationId: case ConversionRelationId: diff --git a/src/backend/catalog/pg_subscription.c b/src/backend/catalog/pg_subscription.c new file mode 100644 index 0000000000..c358ef6c9a --- /dev/null +++ b/src/backend/catalog/pg_subscription.c @@ -0,0 +1,207 @@ +/*------------------------------------------------------------------------- + * + * pg_subscription.c + * replication subscriptions + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/catalog/pg_subscription.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" + +#include "catalog/pg_type.h" +#include "catalog/pg_subscription.h" + +#include "nodes/makefuncs.h" + +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/syscache.h" + + +static List *textarray_to_stringlist(ArrayType *textarray); + +/* + * Fetch the subscription from the syscache. + */ +Subscription * +GetSubscription(Oid subid, bool missing_ok) +{ + HeapTuple tup; + Subscription *sub; + Form_pg_subscription subform; + Datum datum; + bool isnull; + + tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + { + if (missing_ok) + return NULL; + + elog(ERROR, "cache lookup failed for subscription %u", subid); + } + + subform = (Form_pg_subscription) GETSTRUCT(tup); + + sub = (Subscription *) palloc(sizeof(Subscription)); + sub->oid = subid; + sub->dbid = subform->subdbid; + sub->name = pstrdup(NameStr(subform->subname)); + sub->owner = subform->subowner; + sub->enabled = subform->subenabled; + + /* Get conninfo */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, + tup, + Anum_pg_subscription_subconninfo, + &isnull); + Assert(!isnull); + sub->conninfo = pstrdup(TextDatumGetCString(datum)); + + /* Get slotname */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, + tup, + Anum_pg_subscription_subslotname, + &isnull); + Assert(!isnull); + sub->slotname = pstrdup(NameStr(*DatumGetName(datum))); + + /* Get publications */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, + tup, + Anum_pg_subscription_subpublications, + &isnull); + Assert(!isnull); + sub->publications = textarray_to_stringlist(DatumGetArrayTypeP(datum)); + + ReleaseSysCache(tup); + + return sub; +} + +/* + * Return number of subscriptions defined in given database. + * Used by dropdb() to check if database can indeed be dropped. + */ +int +CountDBSubscriptions(Oid dbid) +{ + int nsubs = 0; + Relation rel; + ScanKeyData scankey; + SysScanDesc scan; + HeapTuple tup; + + rel = heap_open(SubscriptionRelationId, RowExclusiveLock); + + ScanKeyInit(&scankey, + Anum_pg_subscription_subdbid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(dbid)); + + scan = systable_beginscan(rel, InvalidOid, false, + NULL, 1, &scankey); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + nsubs++; + + systable_endscan(scan); + + heap_close(rel, NoLock); + + return nsubs; +} + +/* + * Free memory allocated by subscription struct. + */ +void +FreeSubscription(Subscription *sub) +{ + pfree(sub->name); + pfree(sub->conninfo); + pfree(sub->slotname); + list_free_deep(sub->publications); + pfree(sub); +} + +/* + * get_subscription_oid - given a subscription name, look up the OID + * + * If missing_ok is false, throw an error if name not found. If true, just + * return InvalidOid. + */ +Oid +get_subscription_oid(const char *subname, bool missing_ok) +{ + Oid oid; + + oid = GetSysCacheOid2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(subname)); + if (!OidIsValid(oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", subname))); + return oid; +} + +/* + * get_subscription_name - given a subscription OID, look up the name + */ +char * +get_subscription_name(Oid subid) +{ + HeapTuple tup; + char *subname; + Form_pg_subscription subform; + + tup = SearchSysCache1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for subscription %u", subid); + + subform = (Form_pg_subscription) GETSTRUCT(tup); + subname = pstrdup(NameStr(subform->subname)); + + ReleaseSysCache(tup); + + return subname; +} + +/* + * Convert text array to list of strings. + * + * Note: the resulting list of strings is pallocated here. + */ +static List * +textarray_to_stringlist(ArrayType *textarray) +{ + Datum *elems; + int nelems, i; + List *res = NIL; + + deconstruct_array(textarray, + TEXTOID, -1, false, 'i', + &elems, NULL, &nelems); + + if (nelems == 0) + return NIL; + + for (i = 0; i < nelems; i++) + res = lappend(res, makeString(pstrdup(TextDatumGetCString(elems[i])))); + + return res; +} diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 07f291b7cd..4dfedf89b6 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -248,6 +248,15 @@ CREATE VIEW pg_stats WITH (security_barrier) AS REVOKE ALL on pg_statistic FROM public; +CREATE VIEW pg_publication_tables AS + SELECT + P.pubname AS pubname, + N.nspname AS schemaname, + C.relname AS tablename + FROM pg_publication P, pg_class C + JOIN pg_namespace N ON (N.oid = C.relnamespace) + WHERE C.oid IN (SELECT relid FROM pg_get_publication_tables(P.pubname)); + CREATE VIEW pg_locks AS SELECT * FROM pg_lock_status() AS L; @@ -708,6 +717,20 @@ CREATE VIEW pg_stat_wal_receiver AS FROM pg_stat_get_wal_receiver() s WHERE s.pid IS NOT NULL; +CREATE VIEW pg_stat_subscription AS + SELECT + su.oid AS subid, + su.subname, + st.pid, + st.received_lsn, + st.last_msg_send_time, + st.last_msg_receipt_time, + st.latest_end_lsn, + st.latest_end_time + FROM pg_subscription su + LEFT JOIN pg_stat_get_subscription(NULL) st + ON (st.subid = su.oid); + CREATE VIEW pg_stat_ssl AS SELECT S.pid, @@ -866,6 +889,8 @@ CREATE VIEW pg_replication_origin_status AS REVOKE ALL ON pg_replication_origin_status FROM public; +REVOKE ALL ON pg_subscription FROM public; + -- -- We have a few function definitions in here, too. -- At some point there might be enough to justify breaking them out into diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index 6b3742c0a0..e0fab38cbe 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -17,9 +17,9 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \ dbcommands.o define.o discard.o dropcmds.o \ event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \ indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \ - policy.o portalcmds.o prepare.o proclang.o \ - schemacmds.o seclabel.o sequence.o tablecmds.o tablespace.o trigger.o \ - tsearchcmds.o typecmds.o user.o vacuum.o vacuumlazy.o \ - variable.o view.o + policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \ + schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \ + tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \ + vacuumlazy.o variable.o view.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index 8b6f420909..768fcc82dd 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -45,7 +45,9 @@ #include "commands/extension.h" #include "commands/policy.h" #include "commands/proclang.h" +#include "commands/publicationcmds.h" #include "commands/schemacmds.h" +#include "commands/subscriptioncmds.h" #include "commands/tablecmds.h" #include "commands/tablespace.h" #include "commands/trigger.h" @@ -770,6 +772,14 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt) return AlterEventTriggerOwner(strVal(linitial(stmt->object)), newowner); + case OBJECT_PUBLICATION: + return AlterPublicationOwner(strVal(linitial(stmt->object)), + newowner); + + case OBJECT_SUBSCRIPTION: + return AlterSubscriptionOwner(strVal(linitial(stmt->object)), + newowner); + /* Generic cases */ case OBJECT_AGGREGATE: case OBJECT_COLLATION: diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 2833f3e846..6ad8fd77b1 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -37,6 +37,7 @@ #include "catalog/pg_authid.h" #include "catalog/pg_database.h" #include "catalog/pg_db_role_setting.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "commands/comment.h" #include "commands/dbcommands.h" @@ -790,6 +791,7 @@ dropdb(const char *dbname, bool missing_ok) int npreparedxacts; int nslots, nslots_active; + int nsubscriptions; /* * Look up the target database's OID, and get exclusive lock on it. We @@ -874,6 +876,21 @@ dropdb(const char *dbname, bool missing_ok) dbname), errdetail_busy_db(notherbackends, npreparedxacts))); + /* + * Check if there are subscriptions defined in the target database. + * + * We can't drop them automatically because they might be holding + * resources in other databases/instances. + */ + if ((nsubscriptions = CountDBSubscriptions(db_id)) > 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_IN_USE), + errmsg("database \"%s\" is being used by logical replication subscription", + dbname), + errdetail_plural("There is %d subscription.", + "There are %d subscriptions.", + nsubscriptions, nsubscriptions))); + /* * Remove the database's tuple from pg_database. */ diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c index 714b5252c7..8da924517b 100644 --- a/src/backend/commands/define.c +++ b/src/backend/commands/define.c @@ -319,3 +319,31 @@ defGetTypeLength(DefElem *def) def->defname, defGetString(def)))); return 0; /* keep compiler quiet */ } + +/* + * Extract a list of string values (otherwise uninterpreted) from a DefElem. + */ +List * +defGetStringList(DefElem *def) +{ + ListCell *cell; + + if (def->arg == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a parameter", + def->defname))); + if (nodeTag(def->arg) != T_List) + elog(ERROR, "unrecognized node type: %d", (int) nodeTag(def->arg)); + + foreach(cell, (List *)def->arg) + { + Node *str = (Node *) lfirst(cell); + + if (!IsA(str, String)) + elog(ERROR, "unexpected node type in name list: %d", + (int) nodeTag(str)); + } + + return (List *) def->arg; +} diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c index 96436c0689..8cfbcf43f7 100644 --- a/src/backend/commands/dropcmds.c +++ b/src/backend/commands/dropcmds.c @@ -441,6 +441,10 @@ does_not_exist_skipping(ObjectType objtype, List *objname, List *objargs) } } break; + case OBJECT_PUBLICATION: + msg = gettext_noop("publication \"%s\" does not exist, skipping"); + name = NameListToString(objname); + break; default: elog(ERROR, "unrecognized object type: %d", (int) objtype); break; diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index c0061e195e..8125537361 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -106,11 +106,13 @@ static event_trigger_support_data event_trigger_support[] = { {"OPERATOR CLASS", true}, {"OPERATOR FAMILY", true}, {"POLICY", true}, + {"PUBLICATION", true}, {"ROLE", false}, {"RULE", true}, {"SCHEMA", true}, {"SEQUENCE", true}, {"SERVER", true}, + {"SUBSCRIPTION", true}, {"TABLE", true}, {"TABLESPACE", false}, {"TRANSFORM", true}, @@ -1103,9 +1105,12 @@ EventTriggerSupportsObjectType(ObjectType obtype) case OBJECT_OPERATOR: case OBJECT_OPFAMILY: case OBJECT_POLICY: + case OBJECT_PUBLICATION: + case OBJECT_PUBLICATION_REL: case OBJECT_RULE: case OBJECT_SCHEMA: case OBJECT_SEQUENCE: + case OBJECT_SUBSCRIPTION: case OBJECT_TABCONSTRAINT: case OBJECT_TABLE: case OBJECT_TRANSFORM: @@ -1168,6 +1173,9 @@ EventTriggerSupportsObjectClass(ObjectClass objclass) case OCLASS_EXTENSION: case OCLASS_POLICY: case OCLASS_AM: + case OCLASS_PUBLICATION: + case OCLASS_PUBLICATION_REL: + case OCLASS_SUBSCRIPTION: return true; } diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c new file mode 100644 index 0000000000..21e523deb0 --- /dev/null +++ b/src/backend/commands/publicationcmds.c @@ -0,0 +1,754 @@ +/*------------------------------------------------------------------------- + * + * publicationcmds.c + * publication manipulation + * + * Copyright (c) 2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * publicationcmds.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/hash.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/xact.h" + +#include "catalog/catalog.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_inherits_fn.h" +#include "catalog/pg_type.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" + +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/publicationcmds.h" + +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +/* Same as MAXNUMMESSAGES in sinvaladt.c */ +#define MAX_RELCACHE_INVAL_MSGS 4096 + +static List *OpenTableList(List *tables); +static void CloseTableList(List *rels); +static void PublicationAddTables(Oid pubid, List *rels, bool if_not_exists, + AlterPublicationStmt *stmt); +static void PublicationDropTables(Oid pubid, List *rels, bool missing_ok); + +static void +parse_publication_options(List *options, + bool *publish_insert_given, + bool *publish_insert, + bool *publish_update_given, + bool *publish_update, + bool *publish_delete_given, + bool *publish_delete) +{ + ListCell *lc; + + *publish_insert_given = false; + *publish_update_given = false; + *publish_delete_given = false; + + /* Defaults are true */ + *publish_insert = true; + *publish_update = true; + *publish_delete = true; + + /* Parse options */ + foreach (lc, options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "publish insert") == 0) + { + if (*publish_insert_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publish_insert_given = true; + *publish_insert = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "nopublish insert") == 0) + { + if (*publish_insert_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publish_insert_given = true; + *publish_insert = !defGetBoolean(defel); + } + else if (strcmp(defel->defname, "publish update") == 0) + { + if (*publish_update_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publish_update_given = true; + *publish_update = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "nopublish update") == 0) + { + if (*publish_update_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publish_update_given = true; + *publish_update = !defGetBoolean(defel); + } + else if (strcmp(defel->defname, "publish delete") == 0) + { + if (*publish_delete_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publish_delete_given = true; + *publish_delete = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "nopublish delete") == 0) + { + if (*publish_delete_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publish_delete_given = true; + *publish_delete = !defGetBoolean(defel); + } + else + elog(ERROR, "unrecognized option: %s", defel->defname); + } +} + +/* + * Create new publication. + */ +ObjectAddress +CreatePublication(CreatePublicationStmt *stmt) +{ + Relation rel; + ObjectAddress myself; + Oid puboid; + bool nulls[Natts_pg_publication]; + Datum values[Natts_pg_publication]; + HeapTuple tup; + bool publish_insert_given; + bool publish_update_given; + bool publish_delete_given; + bool publish_insert; + bool publish_update; + bool publish_delete; + AclResult aclresult; + + /* must have CREATE privilege on database */ + aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, ACL_KIND_DATABASE, + get_database_name(MyDatabaseId)); + + /* FOR ALL TABLES requires superuser */ + if (stmt->for_all_tables && !superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to create FOR ALL TABLES publication")))); + + rel = heap_open(PublicationRelationId, RowExclusiveLock); + + /* Check if name is used */ + puboid = GetSysCacheOid1(PUBLICATIONNAME, CStringGetDatum(stmt->pubname)); + if (OidIsValid(puboid)) + { + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("publication \"%s\" already exists", + stmt->pubname))); + } + + /* Form a tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + values[Anum_pg_publication_pubname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->pubname)); + values[Anum_pg_publication_pubowner - 1] = ObjectIdGetDatum(GetUserId()); + + parse_publication_options(stmt->options, + &publish_insert_given, &publish_insert, + &publish_update_given, &publish_update, + &publish_delete_given, &publish_delete); + + values[Anum_pg_publication_puballtables - 1] = + BoolGetDatum(stmt->for_all_tables); + values[Anum_pg_publication_pubinsert - 1] = + BoolGetDatum(publish_insert); + values[Anum_pg_publication_pubupdate - 1] = + BoolGetDatum(publish_update); + values[Anum_pg_publication_pubdelete - 1] = + BoolGetDatum(publish_delete); + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + /* Insert tuple into catalog. */ + puboid = simple_heap_insert(rel, tup); + CatalogUpdateIndexes(rel, tup); + heap_freetuple(tup); + + ObjectAddressSet(myself, PublicationRelationId, puboid); + + /* Make the changes visible. */ + CommandCounterIncrement(); + + if (stmt->tables) + { + List *rels; + + Assert(list_length(stmt->tables) > 0); + + rels = OpenTableList(stmt->tables); + PublicationAddTables(puboid, rels, true, NULL); + CloseTableList(rels); + } + + heap_close(rel, RowExclusiveLock); + + InvokeObjectPostCreateHook(PublicationRelationId, puboid, 0); + + return myself; +} + +/* + * Change options of a publication. + */ +static void +AlterPublicationOptions(AlterPublicationStmt *stmt, Relation rel, + HeapTuple tup) +{ + bool nulls[Natts_pg_publication]; + bool replaces[Natts_pg_publication]; + Datum values[Natts_pg_publication]; + bool publish_insert_given; + bool publish_update_given; + bool publish_delete_given; + bool publish_insert; + bool publish_update; + bool publish_delete; + ObjectAddress obj; + + parse_publication_options(stmt->options, + &publish_insert_given, &publish_insert, + &publish_update_given, &publish_update, + &publish_delete_given, &publish_delete); + + /* Everything ok, form a new tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + if (publish_insert_given) + { + values[Anum_pg_publication_pubinsert - 1] = + BoolGetDatum(publish_insert); + replaces[Anum_pg_publication_pubinsert - 1] = true; + } + if (publish_update_given) + { + values[Anum_pg_publication_pubupdate - 1] = + BoolGetDatum(publish_update); + replaces[Anum_pg_publication_pubupdate - 1] = true; + } + if (publish_delete_given) + { + values[Anum_pg_publication_pubdelete - 1] = + BoolGetDatum(publish_delete); + replaces[Anum_pg_publication_pubdelete - 1] = true; + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, + replaces); + + /* Update the catalog. */ + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + CommandCounterIncrement(); + + /* Invalidate the relcache. */ + if (((Form_pg_publication) GETSTRUCT(tup))->puballtables) + { + CacheInvalidateRelcacheAll(); + } + else + { + List *relids = GetPublicationRelations(HeapTupleGetOid(tup)); + + /* + * We don't want to send too many individual messages, at some point + * it's cheaper to just reset whole relcache. + */ + if (list_length(relids) < MAX_RELCACHE_INVAL_MSGS) + { + ListCell *lc; + + foreach (lc, relids) + { + Oid relid = lfirst_oid(lc); + + CacheInvalidateRelcacheByRelid(relid); + } + } + else + CacheInvalidateRelcacheAll(); + } + + ObjectAddressSet(obj, PublicationRelationId, HeapTupleGetOid(tup)); + EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress, + (Node *) stmt); + + InvokeObjectPostAlterHook(PublicationRelationId, HeapTupleGetOid(tup), 0); +} + +/* + * Add or remove table to/from publication. + */ +static void +AlterPublicationTables(AlterPublicationStmt *stmt, Relation rel, + HeapTuple tup) +{ + Oid pubid = HeapTupleGetOid(tup); + List *rels = NIL; + Form_pg_publication pubform = (Form_pg_publication) GETSTRUCT(tup); + + /* Check that user is allowed to manipulate the publication tables. */ + if (pubform->puballtables) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("publication \"%s\" is defined as FOR ALL TABLES", + NameStr(pubform->pubname)), + errdetail("Tables cannot be added to or dropped from FOR ALL TABLES publications."))); + + Assert(list_length(stmt->tables) > 0); + + rels = OpenTableList(stmt->tables); + + if (stmt->tableAction == DEFELEM_ADD) + PublicationAddTables(pubid, rels, false, stmt); + else if (stmt->tableAction == DEFELEM_DROP) + PublicationDropTables(pubid, rels, false); + else /* DEFELEM_SET */ + { + List *oldrelids = GetPublicationRelations(pubid); + List *delrels = NIL; + ListCell *oldlc; + + /* Calculate which relations to drop. */ + foreach(oldlc, oldrelids) + { + Oid oldrelid = lfirst_oid(oldlc); + ListCell *newlc; + bool found = false; + + foreach(newlc, rels) + { + Relation newrel = (Relation) lfirst(newlc); + + if (RelationGetRelid(newrel) == oldrelid) + { + found = true; + break; + } + } + + if (!found) + { + Relation oldrel = heap_open(oldrelid, + ShareUpdateExclusiveLock); + delrels = lappend(delrels, oldrel); + } + } + + /* And drop them. */ + PublicationDropTables(pubid, delrels, true); + + /* + * Don't bother calculating the difference for adding, we'll catch + * and skip existing ones when doing catalog update. + */ + PublicationAddTables(pubid, rels, true, stmt); + + CloseTableList(delrels); + } + + CloseTableList(rels); +} + +/* + * Alter the existing publication. + * + * This is dispatcher function for AlterPublicationOptions and + * AlterPublicationTables. + */ +void +AlterPublication(AlterPublicationStmt *stmt) +{ + Relation rel; + HeapTuple tup; + + rel = heap_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(PUBLICATIONNAME, + CStringGetDatum(stmt->pubname)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", + stmt->pubname))); + + /* must be owner */ + if (!pg_publication_ownercheck(HeapTupleGetOid(tup), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION, + stmt->pubname); + + if (stmt->options) + AlterPublicationOptions(stmt, rel, tup); + else + AlterPublicationTables(stmt, rel, tup); + + /* Cleanup. */ + heap_freetuple(tup); + heap_close(rel, RowExclusiveLock); +} + +/* + * Drop publication by OID + */ +void +RemovePublicationById(Oid pubid) +{ + Relation rel; + HeapTuple tup; + + rel = heap_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication %u", pubid); + + simple_heap_delete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + heap_close(rel, RowExclusiveLock); +} + +/* + * Remove relation from publication by mapping OID. + */ +void +RemovePublicationRelById(Oid proid) +{ + Relation rel; + HeapTuple tup; + Form_pg_publication_rel pubrel; + + rel = heap_open(PublicationRelRelationId, RowExclusiveLock); + + tup = SearchSysCache1(PUBLICATIONREL, ObjectIdGetDatum(proid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication table %u", + proid); + + + pubrel = (Form_pg_publication_rel) GETSTRUCT(tup); + + /* Invalidate relcache so that publication info is rebuilt. */ + CacheInvalidateRelcacheByRelid(pubrel->prrelid); + + simple_heap_delete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + heap_close(rel, RowExclusiveLock); +} + +/* + * Open relations based om provided by RangeVar list. + * The returned tables are locked in ShareUpdateExclusiveLock mode. + */ +static List * +OpenTableList(List *tables) +{ + List *relids = NIL; + List *rels = NIL; + ListCell *lc; + + /* + * Open, share-lock, and check all the explicitly-specified relations + */ + foreach(lc, tables) + { + RangeVar *rv = lfirst(lc); + Relation rel; + bool recurse = rv->inh; + Oid myrelid; + + CHECK_FOR_INTERRUPTS(); + + rel = heap_openrv(rv, ShareUpdateExclusiveLock); + myrelid = RelationGetRelid(rel); + /* + * filter out duplicates when user specifies "foo, foo" + * Note that this algrithm is know to not be very effective (O(N^2)) + * but given that it only works on list of tables given to us by user + * it's deemed acceptable. + */ + if (list_member_oid(relids, myrelid)) + { + heap_close(rel, ShareUpdateExclusiveLock); + continue; + } + rels = lappend(rels, rel); + relids = lappend_oid(relids, myrelid); + + if (recurse) + { + ListCell *child; + List *children; + + children = find_all_inheritors(myrelid, ShareUpdateExclusiveLock, + NULL); + + foreach(child, children) + { + Oid childrelid = lfirst_oid(child); + + if (list_member_oid(relids, childrelid)) + continue; + + /* + * Skip duplicates if user specified both parent and child + * tables. + */ + if (list_member_oid(relids, childrelid)) + { + heap_close(rel, ShareUpdateExclusiveLock); + continue; + } + + /* find_all_inheritors already got lock */ + rel = heap_open(childrelid, NoLock); + rels = lappend(rels, rel); + relids = lappend_oid(relids, childrelid); + } + } + } + + list_free(relids); + + return rels; +} + +/* + * Close all relations in the list. + */ +static void +CloseTableList(List *rels) +{ + ListCell *lc; + + foreach(lc, rels) + { + Relation rel = (Relation) lfirst(lc); + + heap_close(rel, NoLock); + } +} + +/* + * Add listed tables to the publication. + */ +static void +PublicationAddTables(Oid pubid, List *rels, bool if_not_exists, + AlterPublicationStmt *stmt) +{ + ListCell *lc; + + Assert(!stmt || !stmt->for_all_tables); + + foreach(lc, rels) + { + Relation rel = (Relation) lfirst(lc); + ObjectAddress obj; + + /* Must be owner of the table or superuser. */ + if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, + RelationGetRelationName(rel)); + + obj = publication_add_relation(pubid, rel, if_not_exists); + if (stmt) + { + EventTriggerCollectSimpleCommand(obj, InvalidObjectAddress, + (Node *) stmt); + + InvokeObjectPostCreateHook(PublicationRelRelationId, + obj.objectId, 0); + } + } +} + +/* + * Remove listed tables from the publication. + */ +static void +PublicationDropTables(Oid pubid, List *rels, bool missing_ok) +{ + ObjectAddress obj; + ListCell *lc; + Oid prid; + + foreach(lc, rels) + { + Relation rel = (Relation) lfirst(lc); + Oid relid = RelationGetRelid(rel); + + prid = GetSysCacheOid2(PUBLICATIONRELMAP, ObjectIdGetDatum(relid), + ObjectIdGetDatum(pubid)); + if (!OidIsValid(prid)) + { + if (missing_ok) + continue; + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("relation \"%s\" is not part of the publication", + RelationGetRelationName(rel)))); + } + + ObjectAddressSet(obj, PublicationRelRelationId, prid); + performDeletion(&obj, DROP_CASCADE, 0); + } +} + +/* + * Internal workhorse for changing a publication owner + */ + static void +AlterPublicationOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_publication form; + + form = (Form_pg_publication) GETSTRUCT(tup); + + if (form->pubowner == newOwnerId) + return; + + if (!pg_publication_ownercheck(HeapTupleGetOid(tup), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_PUBLICATION, + NameStr(form->pubname)); + + /* New owner must be a superuser */ + if (!superuser_arg(newOwnerId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of publication \"%s\"", + NameStr(form->pubname)), + errhint("The owner of a publication must be a superuser."))); + + form->pubowner = newOwnerId; + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(PublicationRelationId, + HeapTupleGetOid(tup), + newOwnerId); + + InvokeObjectPostAlterHook(PublicationRelationId, + HeapTupleGetOid(tup), 0); +} + +/* + * Change publication owner -- by name + */ +ObjectAddress +AlterPublicationOwner(const char *name, Oid newOwnerId) +{ + Oid subid; + HeapTuple tup; + Relation rel; + ObjectAddress address; + + rel = heap_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(PUBLICATIONNAME, CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication \"%s\" does not exist", name))); + + subid = HeapTupleGetOid(tup); + + AlterPublicationOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, PublicationRelationId, subid); + + heap_freetuple(tup); + + heap_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change publication owner -- by OID + */ +void +AlterPublicationOwner_oid(Oid subid, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = heap_open(PublicationRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(PUBLICATIONOID, ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("publication with OID %u does not exist", subid))); + + AlterPublicationOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + heap_close(rel, RowExclusiveLock); +} diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c new file mode 100644 index 0000000000..1448ee3bee --- /dev/null +++ b/src/backend/commands/subscriptioncmds.c @@ -0,0 +1,643 @@ +/*------------------------------------------------------------------------- + * + * subscriptioncmds.c + * subscription catalog manipulation functions + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * subscriptioncmds.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" + +#include "access/heapam.h" +#include "access/htup_details.h" + +#include "catalog/indexing.h" +#include "catalog/objectaccess.h" +#include "catalog/objectaddress.h" +#include "catalog/pg_type.h" +#include "catalog/pg_subscription.h" + +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/subscriptioncmds.h" + +#include "replication/logicallauncher.h" +#include "replication/origin.h" +#include "replication/walreceiver.h" +#include "replication/worker_internal.h" + +#include "storage/lmgr.h" + +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/syscache.h" + +/* + * Common option parsing function for CREATE and ALTER SUBSCRIPTION commands. + * + * Since not all options can be specified in both commands, this function + * will report an error on options if the target output pointer is NULL to + * accomodate that. + */ +static void +parse_subscription_options(List *options, char **conninfo, + List **publications, bool *enabled_given, + bool *enabled, bool *create_slot, char **slot_name) +{ + ListCell *lc; + bool create_slot_given = false; + + if (conninfo) + *conninfo = NULL; + if (publications) + *publications = NIL; + if (enabled) + { + *enabled_given = false; + *enabled = true; + } + if (create_slot) + *create_slot = true; + if (slot_name) + *slot_name = NULL; + + /* Parse options */ + foreach (lc, options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + if (strcmp(defel->defname, "conninfo") == 0 && conninfo) + { + if (*conninfo) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *conninfo = defGetString(defel); + } + else if (strcmp(defel->defname, "publication") == 0 && publications) + { + if (*publications) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *publications = defGetStringList(defel); + } + else if (strcmp(defel->defname, "enabled") == 0 && enabled) + { + if (*enabled_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *enabled_given = true; + *enabled = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "disabled") == 0 && enabled) + { + if (*enabled_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *enabled_given = true; + *enabled = !defGetBoolean(defel); + } + else if (strcmp(defel->defname, "create slot") == 0 && create_slot) + { + if (create_slot_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + create_slot_given = true; + *create_slot = defGetBoolean(defel); + } + else if (strcmp(defel->defname, "nocreate slot") == 0 && create_slot) + { + if (create_slot_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + create_slot_given = true; + *create_slot = !defGetBoolean(defel); + } + else if (strcmp(defel->defname, "slot name") == 0 && slot_name) + { + if (*slot_name) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + + *slot_name = defGetString(defel); + } + else + elog(ERROR, "unrecognized option: %s", defel->defname); + } +} + +/* + * Auxiliary function to return a text array out of a list of String nodes. + */ +static Datum +publicationListToArray(List *publist) +{ + ArrayType *arr; + Datum *datums; + int j = 0; + ListCell *cell; + MemoryContext memcxt; + MemoryContext oldcxt; + + /* Create memory context for temporary allocations. */ + memcxt = AllocSetContextCreate(CurrentMemoryContext, + "publicationListToArray to array", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcxt = MemoryContextSwitchTo(memcxt); + + datums = palloc(sizeof(text *) * list_length(publist)); + foreach(cell, publist) + { + char *name = strVal(lfirst(cell)); + ListCell *pcell; + + /* Check for duplicates. */ + foreach(pcell, publist) + { + char *pname = strVal(lfirst(cell)); + + if (name == pname) + break; + + if (strcmp(name, pname) == 0) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("publication name \"%s\" used more than once", + pname))); + } + + datums[j++] = CStringGetTextDatum(name); + } + + MemoryContextSwitchTo(oldcxt); + + arr = construct_array(datums, list_length(publist), + TEXTOID, -1, false, 'i'); + MemoryContextDelete(memcxt); + + return PointerGetDatum(arr); +} + +/* + * Create new subscription. + */ +ObjectAddress +CreateSubscription(CreateSubscriptionStmt *stmt) +{ + Relation rel; + ObjectAddress myself; + Oid subid; + bool nulls[Natts_pg_subscription]; + Datum values[Natts_pg_subscription]; + HeapTuple tup; + bool enabled_given; + bool enabled; + char *conninfo; + char *slotname; + char originname[NAMEDATALEN]; + bool create_slot; + List *publications; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to create subscriptions")))); + + rel = heap_open(SubscriptionRelationId, RowExclusiveLock); + + /* Check if name is used */ + subid = GetSysCacheOid2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(stmt->subname)); + if (OidIsValid(subid)) + { + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("subscription \"%s\" already exists", + stmt->subname))); + } + + /* + * Parse and check options. + * Connection and publication should not be specified here. + */ + parse_subscription_options(stmt->options, NULL, NULL, + &enabled_given, &enabled, + &create_slot, &slotname); + if (slotname == NULL) + slotname = stmt->subname; + + conninfo = stmt->conninfo; + publications = stmt->publication; + + /* Load the library providing us libpq calls. */ + load_file("libpqwalreceiver", false); + + /* Check the connection info string. */ + walrcv_check_conninfo(conninfo); + + /* Everything ok, form a new tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + + values[Anum_pg_subscription_subdbid - 1] = ObjectIdGetDatum(MyDatabaseId); + values[Anum_pg_subscription_subname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(stmt->subname)); + values[Anum_pg_subscription_subowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled); + values[Anum_pg_subscription_subconninfo - 1] = + CStringGetTextDatum(conninfo); + values[Anum_pg_subscription_subslotname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(slotname)); + values[Anum_pg_subscription_subpublications - 1] = + publicationListToArray(publications); + + tup = heap_form_tuple(RelationGetDescr(rel), values, nulls); + + /* Insert tuple into catalog. */ + subid = simple_heap_insert(rel, tup); + CatalogUpdateIndexes(rel, tup); + heap_freetuple(tup); + + snprintf(originname, sizeof(originname), "pg_%u", subid); + replorigin_create(originname); + + /* + * If requested, create the replication slot on remote side for our + * newly created subscription. + */ + if (create_slot) + { + XLogRecPtr lsn; + char *err; + WalReceiverConn *wrconn; + + /* Try to connect to the publisher. */ + wrconn = walrcv_connect(conninfo, true, stmt->subname, &err); + if (!wrconn) + ereport(ERROR, + (errmsg("could not connect to the publisher: %s", err))); + + walrcv_create_slot(wrconn, slotname, false, &lsn); + ereport(NOTICE, + (errmsg("created replication slot \"%s\" on publisher", + slotname))); + + /* And we are done with the remote side. */ + walrcv_disconnect(wrconn); + } + + heap_close(rel, RowExclusiveLock); + + ApplyLauncherWakeupAtCommit(); + + ObjectAddressSet(myself, SubscriptionRelationId, subid); + + InvokeObjectPostCreateHook(SubscriptionRelationId, subid, 0); + + return myself; +} + +/* + * Alter the existing subscription. + */ +ObjectAddress +AlterSubscription(AlterSubscriptionStmt *stmt) +{ + Relation rel; + ObjectAddress myself; + bool nulls[Natts_pg_subscription]; + bool replaces[Natts_pg_subscription]; + Datum values[Natts_pg_subscription]; + HeapTuple tup; + Oid subid; + bool enabled_given; + bool enabled; + char *conninfo; + char *slot_name; + List *publications; + + rel = heap_open(SubscriptionRelationId, RowExclusiveLock); + + /* Fetch the existing tuple. */ + tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(stmt->subname)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", + stmt->subname))); + + /* must be owner */ + if (!pg_subscription_ownercheck(HeapTupleGetOid(tup), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION, + stmt->subname); + + subid = HeapTupleGetOid(tup); + + /* Parse options. */ + parse_subscription_options(stmt->options, &conninfo, &publications, + &enabled_given, &enabled, + NULL, &slot_name); + + /* Form a new tuple. */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + if (enabled_given) + { + values[Anum_pg_subscription_subenabled - 1] = BoolGetDatum(enabled); + replaces[Anum_pg_subscription_subenabled - 1] = true; + } + if (conninfo) + { + values[Anum_pg_subscription_subconninfo - 1] = + CStringGetTextDatum(conninfo); + replaces[Anum_pg_subscription_subconninfo - 1] = true; + } + if (slot_name) + { + values[Anum_pg_subscription_subslotname - 1] = + DirectFunctionCall1(namein, CStringGetDatum(slot_name)); + replaces[Anum_pg_subscription_subslotname - 1] = true; + } + if (publications != NIL) + { + values[Anum_pg_subscription_subpublications - 1] = + publicationListToArray(publications); + replaces[Anum_pg_subscription_subpublications - 1] = true; + } + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), values, nulls, + replaces); + + /* Update the catalog. */ + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + ObjectAddressSet(myself, SubscriptionRelationId, subid); + + /* Cleanup. */ + heap_freetuple(tup); + heap_close(rel, RowExclusiveLock); + + InvokeObjectPostAlterHook(SubscriptionRelationId, subid, 0); + + return myself; +} + +/* + * Drop a subscription + */ +void +DropSubscription(DropSubscriptionStmt *stmt) +{ + Relation rel; + ObjectAddress myself; + HeapTuple tup; + Oid subid; + Datum datum; + bool isnull; + char *subname; + char *conninfo; + char *slotname; + char originname[NAMEDATALEN]; + char *err = NULL; + RepOriginId originid; + WalReceiverConn *wrconn = NULL; + StringInfoData cmd; + + rel = heap_open(SubscriptionRelationId, RowExclusiveLock); + + tup = SearchSysCache2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(stmt->subname)); + + if (!HeapTupleIsValid(tup)) + { + heap_close(rel, NoLock); + + if (!stmt->missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", + stmt->subname))); + else + ereport(NOTICE, + (errmsg("subscription \"%s\" does not exist, skipping", + stmt->subname))); + + return; + } + + subid = HeapTupleGetOid(tup); + + /* must be owner */ + if (!pg_subscription_ownercheck(subid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION, + stmt->subname); + + /* DROP hook for the subscription being removed */ + InvokeObjectDropHook(SubscriptionRelationId, subid, 0); + + /* + * Lock the subscription so noboby else can do anything with it + * (including the replication workers). + */ + LockSharedObject(SubscriptionRelationId, subid, 0, AccessExclusiveLock); + + /* Get subname */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup, + Anum_pg_subscription_subname, &isnull); + Assert(!isnull); + subname = pstrdup(NameStr(*DatumGetName(datum))); + + /* Get conninfo */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup, + Anum_pg_subscription_subconninfo, &isnull); + Assert(!isnull); + conninfo = pstrdup(TextDatumGetCString(datum)); + + /* Get slotname */ + datum = SysCacheGetAttr(SUBSCRIPTIONOID, tup, + Anum_pg_subscription_subslotname, &isnull); + Assert(!isnull); + slotname = pstrdup(NameStr(*DatumGetName(datum))); + + ObjectAddressSet(myself, SubscriptionRelationId, subid); + EventTriggerSQLDropAddObject(&myself, true, true); + + /* Remove the tuple from catalog. */ + simple_heap_delete(rel, &tup->t_self); + + ReleaseSysCache(tup); + + /* Protect against launcher restarting the worker. */ + LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE); + + /* Kill the apply worker so that the slot becomes accessible. */ + logicalrep_worker_stop(subid); + + /* Remove the origin tracking if exists. */ + snprintf(originname, sizeof(originname), "pg_%u", subid); + originid = replorigin_by_name(originname, true); + if (originid != InvalidRepOriginId) + replorigin_drop(originid); + + /* If the user asked to not drop the slot, we are done mow.*/ + if (!stmt->drop_slot) + { + heap_close(rel, NoLock); + return; + } + + /* + * Otherwise drop the replication slot at the publisher node using + * the replication connection. + */ + load_file("libpqwalreceiver", false); + + initStringInfo(&cmd); + appendStringInfo(&cmd, "DROP_REPLICATION_SLOT \"%s\"", slotname); + + wrconn = walrcv_connect(conninfo, true, subname, &err); + if (wrconn == NULL) + ereport(ERROR, + (errmsg("could not connect to publisher when attempting to " + "drop the replication slot \"%s\"", slotname), + errdetail("The error was: %s", err))); + + if (!walrcv_command(wrconn, cmd.data, &err)) + ereport(ERROR, + (errmsg("count not drop the replication slot \"%s\" on publisher", + slotname), + errdetail("The error was: %s", err))); + else + ereport(NOTICE, + (errmsg("dropped replication slot \"%s\" on publisher", + slotname))); + + walrcv_disconnect(wrconn); + + pfree(cmd.data); + + heap_close(rel, NoLock); +} + +/* + * Internal workhorse for changing a subscription owner + */ +static void +AlterSubscriptionOwner_internal(Relation rel, HeapTuple tup, Oid newOwnerId) +{ + Form_pg_subscription form; + + form = (Form_pg_subscription) GETSTRUCT(tup); + + if (form->subowner == newOwnerId) + return; + + if (!pg_subscription_ownercheck(HeapTupleGetOid(tup), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_SUBSCRIPTION, + NameStr(form->subname)); + + /* New owner must be a superuser */ + if (!superuser_arg(newOwnerId)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to change owner of subscription \"%s\"", + NameStr(form->subname)), + errhint("The owner of an subscription must be a superuser."))); + + form->subowner = newOwnerId; + simple_heap_update(rel, &tup->t_self, tup); + CatalogUpdateIndexes(rel, tup); + + /* Update owner dependency reference */ + changeDependencyOnOwner(SubscriptionRelationId, + HeapTupleGetOid(tup), + newOwnerId); + + InvokeObjectPostAlterHook(SubscriptionRelationId, + HeapTupleGetOid(tup), 0); +} + +/* + * Change subscription owner -- by name + */ +ObjectAddress +AlterSubscriptionOwner(const char *name, Oid newOwnerId) +{ + Oid subid; + HeapTuple tup; + Relation rel; + ObjectAddress address; + + rel = heap_open(SubscriptionRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy2(SUBSCRIPTIONNAME, MyDatabaseId, + CStringGetDatum(name)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription \"%s\" does not exist", name))); + + subid = HeapTupleGetOid(tup); + + AlterSubscriptionOwner_internal(rel, tup, newOwnerId); + + ObjectAddressSet(address, SubscriptionRelationId, subid); + + heap_freetuple(tup); + + heap_close(rel, RowExclusiveLock); + + return address; +} + +/* + * Change subscription owner -- by OID + */ +void +AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId) +{ + HeapTuple tup; + Relation rel; + + rel = heap_open(SubscriptionRelationId, RowExclusiveLock); + + tup = SearchSysCacheCopy1(SUBSCRIPTIONOID, ObjectIdGetDatum(subid)); + + if (!HeapTupleIsValid(tup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("subscription with OID %u does not exist", subid))); + + AlterSubscriptionOwner_internal(rel, tup, newOwnerId); + + heap_freetuple(tup); + + heap_close(rel, RowExclusiveLock); +} diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6ed2a3dc4d..c4b0011bdd 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -12055,6 +12055,18 @@ ATPrepChangePersistence(Relation rel, bool toLogged) break; } + /* + * Check that the table is not part any publication when changing to + * UNLOGGED as UNLOGGED tables can't be published. + */ + if (!toLogged && + list_length(GetRelationPublications(RelationGetRelid(rel))) > 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot change table \"%s\" to unlogged because it is part of a publication", + RelationGetRelationName(rel)), + errdetail("Unlogged relations cannot be replicated."))); + /* * Check existing foreign key constraints to preserve the invariant that * permanent tables cannot reference unlogged ones. Self-referencing diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index c51415830a..2a2b7eb9bd 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global OBJS = execAmi.o execCurrent.o execGrouping.o execIndexing.o execJunk.o \ execMain.o execParallel.o execProcnode.o execQual.o \ - execScan.o execTuples.o \ + execReplication.o execScan.o execTuples.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o \ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index e6edcc06c2..0bc146ca47 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -43,6 +43,7 @@ #include "access/xact.h" #include "catalog/namespace.h" #include "catalog/partition.h" +#include "catalog/pg_publication.h" #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" @@ -1024,7 +1025,7 @@ CheckValidResultRel(Relation resultRel, CmdType operation) { case RELKIND_RELATION: case RELKIND_PARTITIONED_TABLE: - /* OK */ + CheckCmdReplicaIdentity(resultRel, operation); break; case RELKIND_SEQUENCE: ereport(ERROR, diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c new file mode 100644 index 0000000000..a18ae512db --- /dev/null +++ b/src/backend/executor/execReplication.c @@ -0,0 +1,553 @@ +/*------------------------------------------------------------------------- + * + * execReplication.c + * miscellaneous executor routines for logical replication + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/executor/execReplication.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/relscan.h" +#include "access/transam.h" +#include "access/xact.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_relation.h" +#include "parser/parsetree.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" +#include "utils/tqual.h" + + +/* + * Setup a ScanKey for a search in the relation 'rel' for a tuple 'key' that + * is setup to match 'rel' (*NOT* idxrel!). + * + * Returns whether any column contains NULLs. + * + * This is not generic routine, it expects the idxrel to be replication + * identity of a rel and meet all limitations associated with that. + */ +static bool +build_replindex_scan_key(ScanKey skey, Relation rel, Relation idxrel, + TupleTableSlot *searchslot) +{ + int attoff; + bool isnull; + Datum indclassDatum; + oidvector *opclass; + int2vector *indkey = &idxrel->rd_index->indkey; + bool hasnulls = false; + + Assert(RelationGetReplicaIndex(rel) == RelationGetRelid(idxrel)); + + indclassDatum = SysCacheGetAttr(INDEXRELID, idxrel->rd_indextuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + opclass = (oidvector *) DatumGetPointer(indclassDatum); + + /* Build scankey for every attribute in the index. */ + for (attoff = 0; attoff < RelationGetNumberOfAttributes(idxrel); attoff++) + { + Oid operator; + Oid opfamily; + RegProcedure regop; + int pkattno = attoff + 1; + int mainattno = indkey->values[attoff]; + Oid optype = get_opclass_input_type(opclass->values[attoff]); + + /* + * Load the operator info. We need this to get the equality operator + * function for the scan key. + */ + opfamily = get_opclass_family(opclass->values[attoff]); + + operator = get_opfamily_member(opfamily, optype, + optype, + BTEqualStrategyNumber); + + if (!OidIsValid(operator)) + elog(ERROR, "could not find member %d(%u,%u) of opfamily %u", + BTEqualStrategyNumber, optype, optype, opfamily); + + regop = get_opcode(operator); + + /* Initialize the scankey. */ + ScanKeyInit(&skey[attoff], + pkattno, + BTEqualStrategyNumber, + regop, + searchslot->tts_values[mainattno - 1]); + + /* Check for null value. */ + if (searchslot->tts_isnull[mainattno - 1]) + { + hasnulls = true; + skey[attoff].sk_flags |= SK_ISNULL; + } + } + + return hasnulls; +} + +/* + * Search the relation 'rel' for tuple using the index. + * + * If a matching tuple is found, lock it with lockmode, fill the slot with its + * contents, and return true. Return false otherwise. + */ +bool +RelationFindReplTupleByIndex(Relation rel, Oid idxoid, + LockTupleMode lockmode, + TupleTableSlot *searchslot, + TupleTableSlot *outslot) +{ + HeapTuple scantuple; + ScanKeyData skey[INDEX_MAX_KEYS]; + IndexScanDesc scan; + SnapshotData snap; + TransactionId xwait; + Relation idxrel; + bool found; + + /* Open the index.*/ + idxrel = index_open(idxoid, RowExclusiveLock); + + /* Start an index scan. */ + InitDirtySnapshot(snap); + scan = index_beginscan(rel, idxrel, &snap, + RelationGetNumberOfAttributes(idxrel), + 0); + + /* Build scan key. */ + build_replindex_scan_key(skey, rel, idxrel, searchslot); + +retry: + found = false; + + index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0); + + /* Try to find the tuple */ + if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) + { + found = true; + ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); + ExecMaterializeSlot(outslot); + + xwait = TransactionIdIsValid(snap.xmin) ? + snap.xmin : snap.xmax; + + /* + * If the tuple is locked, wait for locking transaction to finish + * and retry. + */ + if (TransactionIdIsValid(xwait)) + { + XactLockTableWait(xwait, NULL, NULL, XLTW_None); + goto retry; + } + } + + /* Found tuple, try to lock it in the lockmode. */ + if (found) + { + Buffer buf; + HeapUpdateFailureData hufd; + HTSU_Result res; + HeapTupleData locktup; + + ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); + + PushActiveSnapshot(GetLatestSnapshot()); + + res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), + lockmode, + false /* wait */, + false /* don't follow updates */, + &buf, &hufd); + /* the tuple slot already has the buffer pinned */ + ReleaseBuffer(buf); + + PopActiveSnapshot(); + + switch (res) + { + case HeapTupleMayBeUpdated: + break; + case HeapTupleUpdated: + /* XXX: Improve handling here */ + ereport(LOG, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("concurrent update, retrying"))); + goto retry; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: + elog(ERROR, "unexpected heap_lock_tuple status: %u", res); + break; + } + } + + index_endscan(scan); + + /* Don't release lock until commit. */ + index_close(idxrel, NoLock); + + return found; +} + +/* + * Compare the tuple and slot and check if they have equal values. + * + * We use binary datum comparison which might return false negatives but + * that's the best we can do here as there may be multiple notions of + * equality for the data types and table columns don't specify which one + * to use. + */ +static bool +tuple_equals_slot(TupleDesc desc, HeapTuple tup, TupleTableSlot *slot) +{ + Datum values[MaxTupleAttributeNumber]; + bool isnull[MaxTupleAttributeNumber]; + int attrnum; + Form_pg_attribute att; + + heap_deform_tuple(tup, desc, values, isnull); + + /* Check equality of the attributes. */ + for (attrnum = 0; attrnum < desc->natts; attrnum++) + { + /* + * If one value is NULL and other is not, then they are certainly not + * equal + */ + if (isnull[attrnum] != slot->tts_isnull[attrnum]) + return false; + + /* + * If both are NULL, they can be considered equal. + */ + if (isnull[attrnum]) + continue; + + att = desc->attrs[attrnum]; + if (!datumIsEqual(values[attrnum], slot->tts_values[attrnum], + att->attbyval, att->attlen)) + return false; + } + + return true; +} + +/* + * Search the relation 'rel' for tuple using the sequential scan. + * + * If a matching tuple is found, lock it with lockmode, fill the slot with its + * contents, and return true. Return false otherwise. + * + * Note that this stops on the first matching tuple. + * + * This can obviously be quite slow on tables that have more than few rows. + */ +bool +RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, + TupleTableSlot *searchslot, TupleTableSlot *outslot) +{ + HeapTuple scantuple; + HeapScanDesc scan; + SnapshotData snap; + TransactionId xwait; + bool found; + TupleDesc desc = RelationGetDescr(rel); + + Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor)); + + /* Start an index scan. */ + InitDirtySnapshot(snap); + scan = heap_beginscan(rel, &snap, 0, NULL); + +retry: + found = false; + + heap_rescan(scan, NULL); + + /* Try to find the tuple */ + while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + if (!tuple_equals_slot(desc, scantuple, searchslot)) + continue; + + found = true; + ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); + ExecMaterializeSlot(outslot); + + xwait = TransactionIdIsValid(snap.xmin) ? + snap.xmin : snap.xmax; + + /* + * If the tuple is locked, wait for locking transaction to finish + * and retry. + */ + if (TransactionIdIsValid(xwait)) + { + XactLockTableWait(xwait, NULL, NULL, XLTW_None); + goto retry; + } + } + + /* Found tuple, try to lock it in the lockmode. */ + if (found) + { + Buffer buf; + HeapUpdateFailureData hufd; + HTSU_Result res; + HeapTupleData locktup; + + ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); + + PushActiveSnapshot(GetLatestSnapshot()); + + res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), + lockmode, + false /* wait */, + false /* don't follow updates */, + &buf, &hufd); + /* the tuple slot already has the buffer pinned */ + ReleaseBuffer(buf); + + PopActiveSnapshot(); + + switch (res) + { + case HeapTupleMayBeUpdated: + break; + case HeapTupleUpdated: + /* XXX: Improve handling here */ + ereport(LOG, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("concurrent update, retrying"))); + goto retry; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: + elog(ERROR, "unexpected heap_lock_tuple status: %u", res); + break; + } + } + + heap_endscan(scan); + + return found; +} + +/* + * Insert tuple represented in the slot to the relation, update the indexes, + * and execute any constraints and per-row triggers. + * + * Caller is responsible for opening the indexes. + */ +void +ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot) +{ + bool skip_tuple = false; + HeapTuple tuple; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + Relation rel = resultRelInfo->ri_RelationDesc; + + /* For now we support only tables. */ + Assert(rel->rd_rel->relkind == RELKIND_RELATION); + + CheckCmdReplicaIdentity(rel, CMD_INSERT); + + /* BEFORE ROW INSERT Triggers */ + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_insert_before_row) + { + slot = ExecBRInsertTriggers(estate, resultRelInfo, slot); + + if (slot == NULL) /* "do nothing" */ + skip_tuple = true; + } + + if (!skip_tuple) + { + List *recheckIndexes = NIL; + + /* Check the constraints of the tuple */ + if (rel->rd_att->constr) + ExecConstraints(resultRelInfo, slot, slot, estate); + + /* Store the slot into tuple that we can insett. */ + tuple = ExecMaterializeSlot(slot); + + /* OK, store the tuple and create index entries for it */ + simple_heap_insert(rel, tuple); + + if (resultRelInfo->ri_NumIndices > 0) + recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), + estate, false, NULL, + NIL); + + /* AFTER ROW INSERT Triggers */ + ExecARInsertTriggers(estate, resultRelInfo, tuple, + recheckIndexes); + + list_free(recheckIndexes); + } +} + +/* + * Find the searchslot tuple and update it with data in the slot, + * update the indexes, and execute any constraints and per-row triggers. + * + * Caller is responsible for opening the indexes. + */ +void +ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate, + TupleTableSlot *searchslot, TupleTableSlot *slot) +{ + bool skip_tuple = false; + HeapTuple tuple; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + Relation rel = resultRelInfo->ri_RelationDesc; + + /* For now we support only tables. */ + Assert(rel->rd_rel->relkind == RELKIND_RELATION); + + CheckCmdReplicaIdentity(rel, CMD_UPDATE); + + /* BEFORE ROW INSERT Triggers */ + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_update_before_row) + { + slot = ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, + &searchslot->tts_tuple->t_self, + NULL, slot); + + if (slot == NULL) /* "do nothing" */ + skip_tuple = true; + } + + if (!skip_tuple) + { + List *recheckIndexes = NIL; + + /* Check the constraints of the tuple */ + if (rel->rd_att->constr) + ExecConstraints(resultRelInfo, slot, slot, estate); + + /* Store the slot into tuple that we can write. */ + tuple = ExecMaterializeSlot(slot); + + /* OK, update the tuple and index entries for it */ + simple_heap_update(rel, &searchslot->tts_tuple->t_self, + slot->tts_tuple); + + if (resultRelInfo->ri_NumIndices > 0 && + !HeapTupleIsHeapOnly(slot->tts_tuple)) + recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), + estate, false, NULL, + NIL); + + /* AFTER ROW UPDATE Triggers */ + ExecARUpdateTriggers(estate, resultRelInfo, + &searchslot->tts_tuple->t_self, + NULL, tuple, recheckIndexes); + + list_free(recheckIndexes); + } +} + +/* + * Find the searchslot tuple and delete it, and execute any constraints + * and per-row triggers. + * + * Caller is responsible for opening the indexes. + */ +void +ExecSimpleRelationDelete(EState *estate, EPQState *epqstate, + TupleTableSlot *searchslot) +{ + bool skip_tuple = false; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + Relation rel = resultRelInfo->ri_RelationDesc; + + /* For now we support only tables. */ + Assert(rel->rd_rel->relkind == RELKIND_RELATION); + + CheckCmdReplicaIdentity(rel, CMD_DELETE); + + /* BEFORE ROW INSERT Triggers */ + if (resultRelInfo->ri_TrigDesc && + resultRelInfo->ri_TrigDesc->trig_update_before_row) + { + skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, + &searchslot->tts_tuple->t_self, + NULL); + } + + if (!skip_tuple) + { + List *recheckIndexes = NIL; + + /* OK, delete the tuple */ + simple_heap_delete(rel, &searchslot->tts_tuple->t_self); + + /* AFTER ROW DELETE Triggers */ + ExecARDeleteTriggers(estate, resultRelInfo, + &searchslot->tts_tuple->t_self, NULL); + + list_free(recheckIndexes); + } +} + +/* + * Check if command can be executed with current replica identity. + */ +void +CheckCmdReplicaIdentity(Relation rel, CmdType cmd) +{ + PublicationActions *pubactions; + + /* We only need to do checks for UPDATE and DELETE. */ + if (cmd != CMD_UPDATE && cmd != CMD_DELETE) + return; + + /* If relation has replica identity we are always good. */ + if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL || + OidIsValid(RelationGetReplicaIndex(rel))) + return; + + /* + * This is either UPDATE OR DELETE and there is no replica identity. + * + * Check if the table publishes UPDATES or DELETES. + */ + pubactions = GetRelationPublicationActions(rel); + if (cmd == CMD_UPDATE && pubactions->pubupdate) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot update table \"%s\" because it does not have replica identity and publishes updates", + RelationGetRelationName(rel)), + errhint("To enable updating the table, set REPLICA IDENTITY using ALTER TABLE."))); + else if (cmd == CMD_DELETE && pubactions->pubdelete) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("cannot delete from table \"%s\" because it does not have replica identity and publishes deletes", + RelationGetRelationName(rel)), + errhint("To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE."))); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index f871e9d4bb..30d733e57a 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4286,6 +4286,69 @@ _copyPartitionCmd(const PartitionCmd *from) return newnode; } +static CreatePublicationStmt * +_copyCreatePublicationStmt(const CreatePublicationStmt *from) +{ + CreatePublicationStmt *newnode = makeNode(CreatePublicationStmt); + + COPY_STRING_FIELD(pubname); + COPY_NODE_FIELD(options); + COPY_NODE_FIELD(tables); + COPY_SCALAR_FIELD(for_all_tables); + + return newnode; +} + +static AlterPublicationStmt * +_copyAlterPublicationStmt(const AlterPublicationStmt *from) +{ + AlterPublicationStmt *newnode = makeNode(AlterPublicationStmt); + + COPY_STRING_FIELD(pubname); + COPY_NODE_FIELD(options); + COPY_NODE_FIELD(tables); + COPY_SCALAR_FIELD(for_all_tables); + COPY_SCALAR_FIELD(tableAction); + + return newnode; +} + +static CreateSubscriptionStmt * +_copyCreateSubscriptionStmt(const CreateSubscriptionStmt *from) +{ + CreateSubscriptionStmt *newnode = makeNode(CreateSubscriptionStmt); + + COPY_STRING_FIELD(subname); + COPY_STRING_FIELD(conninfo); + COPY_NODE_FIELD(publication); + COPY_NODE_FIELD(options); + + return newnode; +} + +static AlterSubscriptionStmt * +_copyAlterSubscriptionStmt(const AlterSubscriptionStmt *from) +{ + AlterSubscriptionStmt *newnode = makeNode(AlterSubscriptionStmt); + + COPY_STRING_FIELD(subname); + COPY_NODE_FIELD(options); + + return newnode; +} + +static DropSubscriptionStmt * +_copyDropSubscriptionStmt(const DropSubscriptionStmt *from) +{ + DropSubscriptionStmt *newnode = makeNode(DropSubscriptionStmt); + + COPY_STRING_FIELD(subname); + COPY_SCALAR_FIELD(drop_slot); + COPY_SCALAR_FIELD(missing_ok); + + return newnode; +} + /* **************************************************************** * pg_list.h copy functions * **************************************************************** @@ -5086,6 +5149,21 @@ copyObject(const void *from) case T_AlterPolicyStmt: retval = _copyAlterPolicyStmt(from); break; + case T_CreatePublicationStmt: + retval = _copyCreatePublicationStmt(from); + break; + case T_AlterPublicationStmt: + retval = _copyAlterPublicationStmt(from); + break; + case T_CreateSubscriptionStmt: + retval = _copyCreateSubscriptionStmt(from); + break; + case T_AlterSubscriptionStmt: + retval = _copyAlterSubscriptionStmt(from); + break; + case T_DropSubscriptionStmt: + retval = _copyDropSubscriptionStmt(from); + break; case T_A_Expr: retval = _copyAExpr(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 78ed3c773e..55c73b7292 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2134,6 +2134,64 @@ _equalAlterTSConfigurationStmt(const AlterTSConfigurationStmt *a, return true; } +static bool +_equalCreatePublicationStmt(const CreatePublicationStmt *a, + const CreatePublicationStmt *b) +{ + COMPARE_STRING_FIELD(pubname); + COMPARE_NODE_FIELD(options); + COMPARE_NODE_FIELD(tables); + COMPARE_SCALAR_FIELD(for_all_tables); + + return true; +} + +static bool +_equalAlterPublicationStmt(const AlterPublicationStmt *a, + const AlterPublicationStmt *b) +{ + COMPARE_STRING_FIELD(pubname); + COMPARE_NODE_FIELD(options); + COMPARE_NODE_FIELD(tables); + COMPARE_SCALAR_FIELD(for_all_tables); + COMPARE_SCALAR_FIELD(tableAction); + + return true; +} + +static bool +_equalCreateSubscriptionStmt(const CreateSubscriptionStmt *a, + const CreateSubscriptionStmt *b) +{ + COMPARE_STRING_FIELD(subname); + COMPARE_STRING_FIELD(conninfo); + COMPARE_NODE_FIELD(publication); + COMPARE_NODE_FIELD(options); + + return true; +} + +static bool +_equalAlterSubscriptionStmt(const AlterSubscriptionStmt *a, + const AlterSubscriptionStmt *b) +{ + COMPARE_STRING_FIELD(subname); + COMPARE_NODE_FIELD(options); + + return true; +} + +static bool +_equalDropSubscriptionStmt(const DropSubscriptionStmt *a, + const DropSubscriptionStmt *b) +{ + COMPARE_STRING_FIELD(subname); + COMPARE_SCALAR_FIELD(drop_slot); + COMPARE_SCALAR_FIELD(missing_ok); + + return true; +} + static bool _equalCreatePolicyStmt(const CreatePolicyStmt *a, const CreatePolicyStmt *b) { @@ -3349,6 +3407,21 @@ equal(const void *a, const void *b) case T_AlterPolicyStmt: retval = _equalAlterPolicyStmt(a, b); break; + case T_CreatePublicationStmt: + retval = _equalCreatePublicationStmt(a, b); + break; + case T_AlterPublicationStmt: + retval = _equalAlterPublicationStmt(a, b); + break; + case T_CreateSubscriptionStmt: + retval = _equalCreateSubscriptionStmt(a, b); + break; + case T_AlterSubscriptionStmt: + retval = _equalAlterSubscriptionStmt(a, b); + break; + case T_DropSubscriptionStmt: + retval = _equalDropSubscriptionStmt(a, b); + break; case T_A_Expr: retval = _equalAExpr(a, b); break; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index e61ba06efe..a8e35feccc 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -280,6 +280,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); DropOwnedStmt ReassignOwnedStmt AlterTSConfigurationStmt AlterTSDictionaryStmt CreateMatViewStmt RefreshMatViewStmt CreateAmStmt + CreatePublicationStmt AlterPublicationStmt + CreateSubscriptionStmt AlterSubscriptionStmt DropSubscriptionStmt %type select_no_parens select_with_parens select_clause simple_select values_clause @@ -334,6 +336,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); database_name access_method_clause access_method attr_name name cursor_name file_name index_name opt_index_name cluster_index_specification + def_key %type func_name handler_name qual_Op qual_all_Op subquery_Op opt_class opt_inline_handler opt_validator validator_clause @@ -391,10 +394,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); relation_expr_list dostmt_opt_list transform_element_list transform_type_list TriggerTransitions TriggerReferencing + publication_name_list %type group_by_list %type group_by_item empty_grouping_set rollup_clause cube_clause %type grouping_sets_clause +%type opt_publication_for_tables publication_for_tables +%type publication_name_item %type opt_fdw_options fdw_options %type fdw_option @@ -407,7 +413,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type arg_class %type func_return func_type -%type opt_trusted opt_restart_seqs +%type opt_trusted opt_restart_seqs opt_drop_slot %type OptTemp %type OptNoLog %type OnCommitOption @@ -647,7 +653,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); PARALLEL PARSER PARTIAL PARTITION PASSING PASSWORD PLACING PLANS POLICY POSITION PRECEDING PRECISION PRESERVE PREPARE PREPARED PRIMARY - PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM + PRIOR PRIVILEGES PROCEDURAL PROCEDURE PROGRAM PUBLICATION QUOTE @@ -658,9 +664,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES SERIALIZABLE SERVER SESSION SESSION_USER SET SETS SETOF SHARE SHOW - SIMILAR SIMPLE SKIP SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P START - STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P SUBSTRING - SYMMETRIC SYSID SYSTEM_P + SIMILAR SIMPLE SKIP SLOT SMALLINT SNAPSHOT SOME SQL_P STABLE STANDALONE_P + START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP_P + SUBSCRIPTION SUBSTRING SYMMETRIC SYSID SYSTEM_P TABLE TABLES TABLESAMPLE TABLESPACE TEMP TEMPLATE TEMPORARY TEXT_P THEN TIME TIMESTAMP TO TRAILING TRANSACTION TRANSFORM TREAT TRIGGER TRIM TRUE_P @@ -822,8 +828,10 @@ stmt : | AlterTableStmt | AlterTblSpcStmt | AlterCompositeTypeStmt + | AlterPublicationStmt | AlterRoleSetStmt | AlterRoleStmt + | AlterSubscriptionStmt | AlterTSConfigurationStmt | AlterTSDictionaryStmt | AlterUserMappingStmt @@ -851,12 +859,14 @@ stmt : | CreateMatViewStmt | CreateOpClassStmt | CreateOpFamilyStmt + | CreatePublicationStmt | AlterOpFamilyStmt | CreatePolicyStmt | CreatePLangStmt | CreateSchemaStmt | CreateSeqStmt | CreateStmt + | CreateSubscriptionStmt | CreateTableSpaceStmt | CreateTransformStmt | CreateTrigStmt @@ -883,6 +893,7 @@ stmt : | DropPLangStmt | DropRuleStmt | DropStmt + | DropSubscriptionStmt | DropTableSpaceStmt | DropTransformStmt | DropTrigStmt @@ -5613,16 +5624,21 @@ def_list: def_elem { $$ = list_make1($1); } | def_list ',' def_elem { $$ = lappend($1, $3); } ; -def_elem: ColLabel '=' def_arg +def_elem: def_key '=' def_arg { $$ = makeDefElem($1, (Node *) $3, @1); } - | ColLabel + | def_key { $$ = makeDefElem($1, NULL, @1); } ; +def_key: + ColLabel { $$ = $1; } + | ColLabel ColLabel { $$ = psprintf("%s %s", $1, $2); } + ; + /* Note: any simple identifier will be returned as a type name! */ def_arg: func_type { $$ = (Node *)$1; } | reserved_keyword { $$ = (Node *)makeString(pstrdup($1)); } @@ -6073,6 +6089,7 @@ drop_type: TABLE { $$ = OBJECT_TABLE; } | TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; } | TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; } | TEXT_P SEARCH CONFIGURATION { $$ = OBJECT_TSCONFIGURATION; } + | PUBLICATION { $$ = OBJECT_PUBLICATION; } ; any_name_list: @@ -8933,9 +8950,237 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec n->newowner = $7; $$ = (Node *)n; } + | ALTER PUBLICATION name OWNER TO RoleSpec + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_PUBLICATION; + n->object = list_make1(makeString($3)); + n->newowner = $6; + $$ = (Node *)n; + } + | ALTER SUBSCRIPTION name OWNER TO RoleSpec + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_SUBSCRIPTION; + n->object = list_make1(makeString($3)); + n->newowner = $6; + $$ = (Node *)n; + } ; +/***************************************************************************** + * + * CREATE PUBLICATION name [ FOR TABLE ] [ WITH options ] + * + *****************************************************************************/ + +CreatePublicationStmt: + CREATE PUBLICATION name opt_publication_for_tables opt_definition + { + CreatePublicationStmt *n = makeNode(CreatePublicationStmt); + n->pubname = $3; + n->options = $5; + if ($4 != NULL) + { + /* FOR TABLE */ + if (IsA($4, List)) + n->tables = (List *)$4; + /* FOR ALL TABLES */ + else + n->for_all_tables = TRUE; + } + $$ = (Node *)n; + } + ; + +opt_publication_for_tables: + publication_for_tables { $$ = $1; } + | /* EMPTY */ { $$ = NULL; } + ; + +publication_for_tables: + FOR TABLE relation_expr_list + { + $$ = (Node *) $3; + } + | FOR ALL TABLES + { + $$ = (Node *) makeInteger(TRUE); + } + ; + +/***************************************************************************** + * + * ALTER PUBLICATION name [ WITH ] options + * + * ALTER PUBLICATION name ADD TABLE table [, table2] + * + * ALTER PUBLICATION name DROP TABLE table [, table2] + * + * ALTER PUBLICATION name SET TABLE table [, table2] + * + *****************************************************************************/ + +AlterPublicationStmt: + ALTER PUBLICATION name WITH definition + { + AlterPublicationStmt *n = makeNode(AlterPublicationStmt); + n->pubname = $3; + n->options = $5; + $$ = (Node *)n; + } + | ALTER PUBLICATION name ADD_P TABLE relation_expr_list + { + AlterPublicationStmt *n = makeNode(AlterPublicationStmt); + n->pubname = $3; + n->tables = $6; + n->tableAction = DEFELEM_ADD; + $$ = (Node *)n; + } + | ALTER PUBLICATION name SET TABLE relation_expr_list + { + AlterPublicationStmt *n = makeNode(AlterPublicationStmt); + n->pubname = $3; + n->tables = $6; + n->tableAction = DEFELEM_SET; + $$ = (Node *)n; + } + | ALTER PUBLICATION name DROP TABLE relation_expr_list + { + AlterPublicationStmt *n = makeNode(AlterPublicationStmt); + n->pubname = $3; + n->tables = $6; + n->tableAction = DEFELEM_DROP; + $$ = (Node *)n; + } + ; + +/***************************************************************************** + * + * CREATE SUBSCRIPTION name ... + * + *****************************************************************************/ + +CreateSubscriptionStmt: + CREATE SUBSCRIPTION name CONNECTION Sconst PUBLICATION publication_name_list opt_definition + { + CreateSubscriptionStmt *n = + makeNode(CreateSubscriptionStmt); + n->subname = $3; + n->conninfo = $5; + n->publication = $7; + n->options = $8; + $$ = (Node *)n; + } + ; + +publication_name_list: + publication_name_item + { + $$ = list_make1($1); + } + | publication_name_list ',' publication_name_item + { + $$ = lappend($1, $3); + } + ; + +publication_name_item: + ColLabel { $$ = makeString($1); }; + +/***************************************************************************** + * + * ALTER SUBSCRIPTION name [ WITH ] options + * + *****************************************************************************/ + +AlterSubscriptionStmt: + ALTER SUBSCRIPTION name WITH definition + { + AlterSubscriptionStmt *n = + makeNode(AlterSubscriptionStmt); + n->subname = $3; + n->options = $5; + $$ = (Node *)n; + } + | ALTER SUBSCRIPTION name CONNECTION Sconst + { + AlterSubscriptionStmt *n = + makeNode(AlterSubscriptionStmt); + n->subname = $3; + n->options = list_make1(makeDefElem("conninfo", + (Node *)makeString($5), @1)); + $$ = (Node *)n; + } + | ALTER SUBSCRIPTION name SET PUBLICATION publication_name_list + { + AlterSubscriptionStmt *n = + makeNode(AlterSubscriptionStmt); + n->subname = $3; + n->options = list_make1(makeDefElem("publication", + (Node *)$6, @1)); + $$ = (Node *)n; + } + | ALTER SUBSCRIPTION name ENABLE_P + { + AlterSubscriptionStmt *n = + makeNode(AlterSubscriptionStmt); + n->subname = $3; + n->options = list_make1(makeDefElem("enabled", + (Node *)makeInteger(TRUE), @1)); + $$ = (Node *)n; + } + | ALTER SUBSCRIPTION name DISABLE_P + { + AlterSubscriptionStmt *n = + makeNode(AlterSubscriptionStmt); + n->subname = $3; + n->options = list_make1(makeDefElem("enabled", + (Node *)makeInteger(FALSE), @1)); + $$ = (Node *)n; + } ; + +/***************************************************************************** + * + * DROP SUBSCRIPTION [ IF EXISTS ] name + * + *****************************************************************************/ + +DropSubscriptionStmt: DROP SUBSCRIPTION name opt_drop_slot + { + DropSubscriptionStmt *n = makeNode(DropSubscriptionStmt); + n->subname = $3; + n->drop_slot = $4; + n->missing_ok = false; + $$ = (Node *) n; + } + | DROP SUBSCRIPTION IF_P EXISTS name opt_drop_slot + { + DropSubscriptionStmt *n = makeNode(DropSubscriptionStmt); + n->subname = $5; + n->drop_slot = $6; + n->missing_ok = true; + $$ = (Node *) n; + } + ; + +opt_drop_slot: + IDENT SLOT + { + if (strcmp($1, "drop") == 0) + $$ = TRUE; + else if (strcmp($1, "nodrop") == 0) + $$ = FALSE; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized option \"%s\"", $1), + parser_errposition(@1))); + } + | /*EMPTY*/ { $$ = TRUE; } + ; + /***************************************************************************** * * QUERY: Define Rewrite Rule @@ -14201,6 +14446,7 @@ unreserved_keyword: | PROCEDURAL | PROCEDURE | PROGRAM + | PUBLICATION | QUOTE | RANGE | READ @@ -14244,6 +14490,7 @@ unreserved_keyword: | SHOW | SIMPLE | SKIP + | SLOT | SNAPSHOT | SQL_P | STABLE @@ -14256,6 +14503,7 @@ unreserved_keyword: | STORAGE | STRICT_P | STRIP_P + | SUBSCRIPTION | SYSID | SYSTEM_P | TABLES diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index 61d3170b83..cd99b0b392 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -20,6 +20,7 @@ #include "port/atomics.h" #include "postmaster/bgworker_internals.h" #include "postmaster/postmaster.h" +#include "replication/logicallauncher.h" #include "storage/dsm.h" #include "storage/ipc.h" #include "storage/latch.h" @@ -107,6 +108,15 @@ struct BackgroundWorkerHandle static BackgroundWorkerArray *BackgroundWorkerData; +/* + * List of workers that are allowed to be started outside of + * shared_preload_libraries. + */ +static const bgworker_main_type InternalBGWorkers[] = { + ApplyLauncherMain, + NULL +}; + /* * Calculate shared memory needed. */ @@ -761,12 +771,23 @@ RegisterBackgroundWorker(BackgroundWorker *worker) { RegisteredBgWorker *rw; static int numworkers = 0; + bool internal = false; + int i; if (!IsUnderPostmaster) ereport(DEBUG1, (errmsg("registering background worker \"%s\"", worker->bgw_name))); - if (!process_shared_preload_libraries_in_progress) + for (i = 0; InternalBGWorkers[i]; i++) + { + if (worker->bgw_main == InternalBGWorkers[i]) + { + internal = true; + break; + } + } + + if (!process_shared_preload_libraries_in_progress && !internal) { if (!IsUnderPostmaster) ereport(LOG, diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index f37a0bfaaf..7176cf1bbe 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3303,6 +3303,12 @@ pgstat_get_wait_activity(WaitEventActivity w) case WAIT_EVENT_WAL_WRITER_MAIN: event_name = "WalWriterMain"; break; + case WAIT_EVENT_LOGICAL_LAUNCHER_MAIN: + event_name = "LogicalLauncherMain"; + break; + case WAIT_EVENT_LOGICAL_APPLY_MAIN: + event_name = "LogicalApplyMain"; + break; /* no default case, so that compiler will warn */ } diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 5be30b0ee1..8d99c7a0d4 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -113,6 +113,7 @@ #include "postmaster/pgarch.h" #include "postmaster/postmaster.h" #include "postmaster/syslogger.h" +#include "replication/logicallauncher.h" #include "replication/walsender.h" #include "storage/fd.h" #include "storage/ipc.h" @@ -941,6 +942,14 @@ PostmasterMain(int argc, char *argv[]) } #endif + /* + * Register the apply launcher. Since it registers a background worker, + * it needs to be called before InitializeMaxBackends(), and it's probably + * a good idea to call it before any modules had chance to take the + * background worker slots. + */ + ApplyLauncherRegister(); + /* * process any libraries that should be preloaded at postmaster start */ diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 7671b166ed..7df3698afb 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -24,9 +24,11 @@ #include "access/xlog.h" #include "miscadmin.h" #include "pgstat.h" +#include "replication/logicalproto.h" #include "replication/walreceiver.h" #include "storage/proc.h" #include "utils/builtins.h" +#include "utils/pg_lsn.h" PG_MODULE_MAGIC; @@ -44,26 +46,35 @@ struct WalReceiverConn /* Prototypes for interface functions */ static WalReceiverConn *libpqrcv_connect(const char *conninfo, - bool logical, const char *appname); + bool logical, const char *appname, + char **err); +static void libpqrcv_check_conninfo(const char *conninfo); static char *libpqrcv_get_conninfo(WalReceiverConn *conn); static char *libpqrcv_identify_system(WalReceiverConn *conn, - TimeLineID *primary_tli); + TimeLineID *primary_tli, + int *server_version); static void libpqrcv_readtimelinehistoryfile(WalReceiverConn *conn, TimeLineID tli, char **filename, char **content, int *len); static bool libpqrcv_startstreaming(WalReceiverConn *conn, - TimeLineID tli, XLogRecPtr startpoint, - const char *slotname); + const WalRcvStreamOptions *options); static void libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli); static int libpqrcv_receive(WalReceiverConn *conn, char **buffer, pgsocket *wait_fd); static void libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes); +static char *libpqrcv_create_slot(WalReceiverConn *conn, + const char *slotname, + bool temporary, + XLogRecPtr *lsn); +static bool libpqrcv_command(WalReceiverConn *conn, + const char *cmd, char **err); static void libpqrcv_disconnect(WalReceiverConn *conn); static WalReceiverFunctionsType PQWalReceiverFunctions = { libpqrcv_connect, + libpqrcv_check_conninfo, libpqrcv_get_conninfo, libpqrcv_identify_system, libpqrcv_readtimelinehistoryfile, @@ -71,11 +82,14 @@ static WalReceiverFunctionsType PQWalReceiverFunctions = { libpqrcv_endstreaming, libpqrcv_receive, libpqrcv_send, + libpqrcv_create_slot, + libpqrcv_command, libpqrcv_disconnect }; /* Prototypes for private functions */ static PGresult *libpqrcv_PQexec(PGconn *streamConn, const char *query); +static char *stringlist_to_identifierstr(PGconn *conn, List *strings); /* * Module initialization function @@ -90,9 +104,12 @@ _PG_init(void) /* * Establish the connection to the primary server for XLOG streaming + * + * Returns NULL on error and fills the err with palloc'ed error message. */ static WalReceiverConn * -libpqrcv_connect(const char *conninfo, bool logical, const char *appname) +libpqrcv_connect(const char *conninfo, bool logical, const char *appname, + char **err) { WalReceiverConn *conn; const char *keys[5]; @@ -123,14 +140,34 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname) conn = palloc0(sizeof(WalReceiverConn)); conn->streamConn = PQconnectdbParams(keys, vals, /* expand_dbname = */ true); if (PQstatus(conn->streamConn) != CONNECTION_OK) - ereport(ERROR, - (errmsg("could not connect to the primary server: %s", - PQerrorMessage(conn->streamConn)))); + { + *err = pstrdup(PQerrorMessage(conn->streamConn)); + return NULL; + } + conn->logical = logical; return conn; } +/* + * Validate connection info string (just try to parse it) + */ +static void +libpqrcv_check_conninfo(const char *conninfo) +{ + PQconninfoOption *opts = NULL; + char *err = NULL; + + opts = PQconninfoParse(conninfo, &err); + if (opts == NULL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid connection string syntax: %s", err))); + + PQconninfoFree(opts); +} + /* * Return a user-displayable conninfo string. Any security-sensitive fields * are obfuscated. @@ -185,7 +222,8 @@ libpqrcv_get_conninfo(WalReceiverConn *conn) * timeline ID of the primary. */ static char * -libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli) +libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli, + int *server_version) { PGresult *res; char *primary_sysid; @@ -218,11 +256,13 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli) *primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0); PQclear(res); + *server_version = PQserverVersion(conn->streamConn); + return primary_sysid; } /* - * Start streaming WAL data from given startpoint and timeline. + * Start streaming WAL data from given streaming options. * * Returns true if we switched successfully to copy-both mode. False * means the server received the command and executed it successfully, but @@ -233,27 +273,54 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli) */ static bool libpqrcv_startstreaming(WalReceiverConn *conn, - TimeLineID tli, XLogRecPtr startpoint, - const char *slotname) + const WalRcvStreamOptions *options) { StringInfoData cmd; PGresult *res; - Assert(!conn->logical); + Assert(options->logical == conn->logical); + Assert(options->slotname || !options->logical); initStringInfo(&cmd); - /* Start streaming from the point requested by startup process */ - if (slotname != NULL) - appendStringInfo(&cmd, - "START_REPLICATION SLOT \"%s\" %X/%X TIMELINE %u", - slotname, - (uint32) (startpoint >> 32), (uint32) startpoint, - tli); + /* Build the command. */ + appendStringInfoString(&cmd, "START_REPLICATION"); + if (options->slotname != NULL) + appendStringInfo(&cmd, " SLOT \"%s\"", + options->slotname); + + if (options->logical) + appendStringInfo(&cmd, " LOGICAL"); + + appendStringInfo(&cmd, " %X/%X", + (uint32) (options->startpoint >> 32), + (uint32) options->startpoint); + + /* + * Additional options are different depending on if we are doing logical + * or physical replication. + */ + if (options->logical) + { + char *pubnames_str; + List *pubnames; + + appendStringInfoString(&cmd, " ("); + appendStringInfo(&cmd, "proto_version '%u'", + options->proto.logical.proto_version); + pubnames = options->proto.logical.publication_names; + pubnames_str = stringlist_to_identifierstr(conn->streamConn, pubnames); + appendStringInfo(&cmd, ", publication_names %s", + PQescapeLiteral(conn->streamConn, pubnames_str, + strlen(pubnames_str))); + appendStringInfoChar(&cmd, ')'); + pfree(pubnames_str); + } else - appendStringInfo(&cmd, "START_REPLICATION %X/%X TIMELINE %u", - (uint32) (startpoint >> 32), (uint32) startpoint, - tli); + appendStringInfo(&cmd, " TIMELINE %u", + options->proto.physical.startpointTLI); + + /* Start streaming. */ res = libpqrcv_PQexec(conn->streamConn, cmd.data); pfree(cmd.data); @@ -577,3 +644,107 @@ libpqrcv_send(WalReceiverConn *conn, const char *buffer, int nbytes) (errmsg("could not send data to WAL stream: %s", PQerrorMessage(conn->streamConn)))); } + +/* + * Create new replication slot. + * Returns the name of the exported snapshot for logical slot or NULL for + * physical slot. + */ +static char * +libpqrcv_create_slot(WalReceiverConn *conn, const char *slotname, + bool temporary, XLogRecPtr *lsn) +{ + PGresult *res; + StringInfoData cmd; + char *snapshot; + + initStringInfo(&cmd); + + appendStringInfo(&cmd, "CREATE_REPLICATION_SLOT \"%s\" ", slotname); + + if (temporary) + appendStringInfo(&cmd, "TEMPORARY "); + + if (conn->logical) + appendStringInfo(&cmd, "LOGICAL pgoutput"); + + res = libpqrcv_PQexec(conn->streamConn, cmd.data); + pfree(cmd.data); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + PQclear(res); + ereport(ERROR, + (errmsg("could not create replication slot \"%s\": %s", + slotname, PQerrorMessage(conn->streamConn)))); + } + + *lsn = DatumGetLSN(DirectFunctionCall1Coll(pg_lsn_in, InvalidOid, + CStringGetDatum(PQgetvalue(res, 0, 1)))); + if (!PQgetisnull(res, 0, 2)) + snapshot = pstrdup(PQgetvalue(res, 0, 2)); + else + snapshot = NULL; + + PQclear(res); + + return snapshot; +} + +/* + * Run command. + * + * Returns if the command has succeeded and fills the err with palloced + * error message if not. + */ +static bool +libpqrcv_command(WalReceiverConn *conn, const char *cmd, char **err) +{ + PGresult *res; + + res = libpqrcv_PQexec(conn->streamConn, cmd); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + PQclear(res); + *err = pstrdup(PQerrorMessage(conn->streamConn)); + return false; + } + + PQclear(res); + + return true; +} + +/* + * Given a List of strings, return it as single comma separated + * string, quoting identifiers as needed. + * + * This is essentially the reverse of SplitIdentifierString. + * + * The caller should free the result. + */ +static char * +stringlist_to_identifierstr(PGconn *conn, List *strings) +{ + ListCell *lc; + StringInfoData res; + bool first = true; + + initStringInfo(&res); + + foreach (lc, strings) + { + char *val = strVal(lfirst(lc)); + + if (first) + first = false; + else + appendStringInfoChar(&res, ','); + + appendStringInfoString(&res, + PQescapeIdentifier(conn, val, strlen(val))); + } + + return res.data; +} diff --git a/src/backend/replication/logical/Makefile b/src/backend/replication/logical/Makefile index 1d7ca062d1..259befa4e6 100644 --- a/src/backend/replication/logical/Makefile +++ b/src/backend/replication/logical/Makefile @@ -14,7 +14,7 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) -OBJS = decode.o logical.o logicalfuncs.o message.o origin.o reorderbuffer.o \ - snapbuild.o +OBJS = decode.o launcher.o logical.o logicalfuncs.o message.o origin.o \ + proto.o relation.o reorderbuffer.o snapbuild.o worker.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c new file mode 100644 index 0000000000..b5240dcede --- /dev/null +++ b/src/backend/replication/logical/launcher.c @@ -0,0 +1,759 @@ +/*------------------------------------------------------------------------- + * launcher.c + * PostgreSQL logical replication worker launcher process + * + * Copyright (c) 2012-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logical/launcher.c + * + * NOTES + * This module contains the logical replication worker launcher which + * uses the background worker infrastructure to start the logical + * replication workers for every enabled subscription. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "miscadmin.h" +#include "pgstat.h" + +#include "access/heapam.h" +#include "access/htup.h" +#include "access/htup_details.h" +#include "access/xact.h" + +#include "catalog/pg_subscription.h" + +#include "libpq/pqsignal.h" + +#include "postmaster/bgworker.h" +#include "postmaster/fork_process.h" +#include "postmaster/postmaster.h" + +#include "replication/logicallauncher.h" +#include "replication/logicalworker.h" +#include "replication/slot.h" +#include "replication/worker_internal.h" + +#include "storage/ipc.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/procsignal.h" + +#include "tcop/tcopprot.h" + +#include "utils/memutils.h" +#include "utils/pg_lsn.h" +#include "utils/ps_status.h" +#include "utils/timeout.h" +#include "utils/snapmgr.h" + +/* max sleep time between cycles (3min) */ +#define DEFAULT_NAPTIME_PER_CYCLE 180000L + +int max_logical_replication_workers = 4; +LogicalRepWorker *MyLogicalRepWorker = NULL; + +typedef struct LogicalRepCtxStruct +{ + /* Supervisor process. */ + pid_t launcher_pid; + + /* Background workers. */ + LogicalRepWorker workers[FLEXIBLE_ARRAY_MEMBER]; +} LogicalRepCtxStruct; + +LogicalRepCtxStruct *LogicalRepCtx; + +static void logicalrep_worker_onexit(int code, Datum arg); +static void logicalrep_worker_detach(void); + +bool got_SIGTERM = false; +static bool on_commit_laucher_wakeup = false; + +Datum pg_stat_get_subscription(PG_FUNCTION_ARGS); + + +/* + * Load the list of subscriptions. + * + * Only the fields interesting for worker start/stop functions are filled for + * each subscription. + */ +static List * +get_subscription_list(void) +{ + List *res = NIL; + Relation rel; + HeapScanDesc scan; + HeapTuple tup; + MemoryContext resultcxt; + + /* This is the context that we will allocate our output data in */ + resultcxt = CurrentMemoryContext; + + /* + * Start a transaction so we can access pg_database, and get a snapshot. + * We don't have a use for the snapshot itself, but we're interested in + * the secondary effect that it sets RecentGlobalXmin. (This is critical + * for anything that reads heap pages, because HOT may decide to prune + * them even if the process doesn't attempt to modify any tuples.) + */ + StartTransactionCommand(); + (void) GetTransactionSnapshot(); + + rel = heap_open(SubscriptionRelationId, AccessShareLock); + scan = heap_beginscan_catalog(rel, 0, NULL); + + while (HeapTupleIsValid(tup = heap_getnext(scan, ForwardScanDirection))) + { + Form_pg_subscription subform = (Form_pg_subscription) GETSTRUCT(tup); + Subscription *sub; + MemoryContext oldcxt; + + /* + * Allocate our results in the caller's context, not the + * transaction's. We do this inside the loop, and restore the original + * context at the end, so that leaky things like heap_getnext() are + * not called in a potentially long-lived context. + */ + oldcxt = MemoryContextSwitchTo(resultcxt); + + sub = (Subscription *) palloc(sizeof(Subscription)); + sub->oid = HeapTupleGetOid(tup); + sub->dbid = subform->subdbid; + sub->owner = subform->subowner; + sub->enabled = subform->subenabled; + sub->name = pstrdup(NameStr(subform->subname)); + + /* We don't fill fields we are not interested in. */ + sub->conninfo = NULL; + sub->slotname = NULL; + sub->publications = NIL; + + res = lappend(res, sub); + MemoryContextSwitchTo(oldcxt); + } + + heap_endscan(scan); + heap_close(rel, AccessShareLock); + + CommitTransactionCommand(); + + return res; +} + +/* + * Wait for a background worker to start up and attach to the shmem context. + * + * This is like WaitForBackgroundWorkerStartup(), except that we wait for + * attaching, not just start and we also just exit if postmaster died. + */ +static bool +WaitForReplicationWorkerAttach(LogicalRepWorker *worker, + BackgroundWorkerHandle *handle) +{ + BgwHandleStatus status; + int rc; + + for (;;) + { + pid_t pid; + + CHECK_FOR_INTERRUPTS(); + + status = GetBackgroundWorkerPid(handle, &pid); + + /* + * Worker started and attached to our shmem. This check is safe + * because only laucher ever starts the workers, so nobody can steal + * the worker slot. + */ + if (status == BGWH_STARTED && worker->proc) + return true; + /* Worker didn't start or died before attaching to our shmem. */ + if (status == BGWH_STOPPED) + return false; + + /* + * We need timeout because we generaly don't get notified via latch + * about the worker attach. + */ + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + 1000L, WAIT_EVENT_BGWORKER_STARTUP); + + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + ResetLatch(MyLatch); + } + + return false; +} + +/* + * Walks the workers array and searches for one that matches given + * subscription id. + */ +LogicalRepWorker * +logicalrep_worker_find(Oid subid) +{ + int i; + LogicalRepWorker *res = NULL; + + Assert(LWLockHeldByMe(LogicalRepWorkerLock)); + /* Search for attached worker for a given subscription id. */ + for (i = 0; i < max_logical_replication_workers; i++) + { + LogicalRepWorker *w = &LogicalRepCtx->workers[i]; + if (w->subid == subid && w->proc && IsBackendPid(w->proc->pid)) + { + res = w; + break; + } + } + + return res; +} + +/* + * Start new apply background worker. + */ +void +logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid) +{ + BackgroundWorker bgw; + BackgroundWorkerHandle *bgw_handle; + int slot; + LogicalRepWorker *worker = NULL; + + ereport(LOG, + (errmsg("starting logical replication worker for subscription \"%s\"", + subname))); + + /* Report this after the initial starting message for consistency. */ + if (max_replication_slots == 0) + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("cannot start logical replication workers when max_replication_slots = 0"))); + + /* + * We need to do the modification of the shared memory under lock so that + * we have consistent view. + */ + LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE); + + /* Find unused worker slot. */ + for (slot = 0; slot < max_logical_replication_workers; slot++) + { + if (!LogicalRepCtx->workers[slot].proc) + { + worker = &LogicalRepCtx->workers[slot]; + break; + } + } + + /* Bail if not found */ + if (worker == NULL) + { + ereport(WARNING, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("out of logical replication workers slots"), + errhint("You might need to increase max_logical_replication_workers."))); + return; + } + + /* Prepare the worker info. */ + memset(worker, 0, sizeof(LogicalRepWorker)); + worker->dbid = dbid; + worker->userid = userid; + worker->subid = subid; + + LWLockRelease(LogicalRepWorkerLock); + + /* Register the new dynamic worker. */ + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | + BGWORKER_BACKEND_DATABASE_CONNECTION; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + bgw.bgw_main = ApplyWorkerMain; + snprintf(bgw.bgw_name, BGW_MAXLEN, + "logical replication worker for subscription %u", subid); + + bgw.bgw_restart_time = BGW_NEVER_RESTART; + bgw.bgw_notify_pid = MyProcPid; + bgw.bgw_main_arg = slot; + + if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle)) + { + ereport(WARNING, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("out of background workers slots"), + errhint("You might need to increase max_worker_processes."))); + return; + } + + /* Now wait until it attaches. */ + WaitForReplicationWorkerAttach(worker, bgw_handle); +} + +/* + * Stop the logical replication worker and wait until it detaches from the + * slot. + * + * The caller must hold LogicalRepLauncherLock to ensure that new workers are + * not being started during this function call. + */ +void +logicalrep_worker_stop(Oid subid) +{ + LogicalRepWorker *worker; + + Assert(LWLockHeldByMe(LogicalRepLauncherLock)); + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + + worker = logicalrep_worker_find(subid); + + /* No worker, nothing to do. */ + if (!worker) + { + LWLockRelease(LogicalRepWorkerLock); + return; + } + + /* + * If we found worker but it does not have proc set it is starting up, + * wait for it to finish and then kill it. + */ + while (worker && !worker->proc) + { + int rc; + + LWLockRelease(LogicalRepWorkerLock); + + CHECK_FOR_INTERRUPTS(); + + /* Wait for signal. */ + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + 1000L, WAIT_EVENT_BGWORKER_STARTUP); + + /* emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + ResetLatch(&MyProc->procLatch); + + /* Check if the worker has started. */ + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + worker = logicalrep_worker_find(subid); + if (!worker || worker->proc) + break; + } + + /* Now terminate the worker ... */ + kill(worker->proc->pid, SIGTERM); + LWLockRelease(LogicalRepWorkerLock); + + /* ... and wait for it to die. */ + for (;;) + { + int rc; + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + if (!worker->proc) + { + LWLockRelease(LogicalRepWorkerLock); + break; + } + LWLockRelease(LogicalRepWorkerLock); + + CHECK_FOR_INTERRUPTS(); + + /* Wait for more work. */ + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + 1000L, WAIT_EVENT_BGWORKER_SHUTDOWN); + + /* emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + ResetLatch(&MyProc->procLatch); + } +} + +/* + * Attach to a slot. + */ +void +logicalrep_worker_attach(int slot) +{ + /* Block concurrent access. */ + LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE); + + Assert(slot >= 0 && slot < max_logical_replication_workers); + MyLogicalRepWorker = &LogicalRepCtx->workers[slot]; + + if (MyLogicalRepWorker->proc) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication worker slot %d already used by " + "another worker", slot))); + + MyLogicalRepWorker->proc = MyProc; + before_shmem_exit(logicalrep_worker_onexit, (Datum) 0); + + LWLockRelease(LogicalRepWorkerLock); +} + +/* + * Detach the worker (cleans up the worker info). + */ +static void +logicalrep_worker_detach(void) +{ + /* Block concurrent access. */ + LWLockAcquire(LogicalRepWorkerLock, LW_EXCLUSIVE); + + MyLogicalRepWorker->dbid = InvalidOid; + MyLogicalRepWorker->userid = InvalidOid; + MyLogicalRepWorker->subid = InvalidOid; + MyLogicalRepWorker->proc = NULL; + + LWLockRelease(LogicalRepWorkerLock); +} + +/* + * Cleanup function. + * + * Called on logical replication worker exit. + */ +static void +logicalrep_worker_onexit(int code, Datum arg) +{ + logicalrep_worker_detach(); +} + +/* SIGTERM: set flag to exit at next convenient time */ +void +logicalrep_worker_sigterm(SIGNAL_ARGS) +{ + got_SIGTERM = true; + + /* Waken anything waiting on the process latch */ + SetLatch(MyLatch); +} + +/* + * ApplyLauncherShmemSize + * Compute space needed for replication launcher shared memory + */ +Size +ApplyLauncherShmemSize(void) +{ + Size size; + + /* + * Need the fixed struct and the array of LogicalRepWorker. + */ + size = sizeof(LogicalRepCtxStruct); + size = MAXALIGN(size); + size = add_size(size, mul_size(max_logical_replication_workers, + sizeof(LogicalRepWorker))); + return size; +} + +void +ApplyLauncherRegister(void) +{ + BackgroundWorker bgw; + + if (max_logical_replication_workers == 0) + return; + + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | + BGWORKER_BACKEND_DATABASE_CONNECTION; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + bgw.bgw_main = ApplyLauncherMain; + snprintf(bgw.bgw_name, BGW_MAXLEN, + "logical replication launcher"); + bgw.bgw_restart_time = 5; + bgw.bgw_notify_pid = 0; + bgw.bgw_main_arg = (Datum) 0; + + RegisterBackgroundWorker(&bgw); +} + +/* + * ApplyLauncherShmemInit + * Allocate and initialize replication launcher shared memory + */ +void +ApplyLauncherShmemInit(void) +{ + bool found; + + LogicalRepCtx = (LogicalRepCtxStruct *) + ShmemInitStruct("Logical Replication Launcher Data", + ApplyLauncherShmemSize(), + &found); + + if (!found) + memset(LogicalRepCtx, 0, ApplyLauncherShmemSize()); +} + +/* + * Wakeup the launcher on commit if requested. + */ +void +AtCommit_ApplyLauncher(void) +{ + if (on_commit_laucher_wakeup) + ApplyLauncherWakeup(); +} + +/* + * Request wakeup of the launcher on commit of the transaction. + * + * This is used to send launcher signal to stop sleeping and proccess the + * subscriptions when current transaction commits. Should be used when new + * tuple was added to the pg_subscription catalog. +*/ +void +ApplyLauncherWakeupAtCommit(void) +{ + if (!on_commit_laucher_wakeup) + on_commit_laucher_wakeup = true; +} + +void +ApplyLauncherWakeup(void) +{ + if (IsBackendPid(LogicalRepCtx->launcher_pid)) + kill(LogicalRepCtx->launcher_pid, SIGUSR1); +} + +/* + * Main loop for the apply launcher process. + */ +void +ApplyLauncherMain(Datum main_arg) +{ + ereport(LOG, + (errmsg("logical replication launcher started"))); + + /* Establish signal handlers. */ + pqsignal(SIGTERM, logicalrep_worker_sigterm); + BackgroundWorkerUnblockSignals(); + + /* Make it easy to identify our processes. */ + SetConfigOption("application_name", MyBgworkerEntry->bgw_name, + PGC_USERSET, PGC_S_SESSION); + + LogicalRepCtx->launcher_pid = MyProcPid; + + /* + * Establish connection to nailed catalogs (we only ever access + * pg_subscription). + */ + BackgroundWorkerInitializeConnection(NULL, NULL); + + /* Enter main loop */ + while (!got_SIGTERM) + { + int rc; + List *sublist; + ListCell *lc; + MemoryContext subctx; + MemoryContext oldctx; + TimestampTz now; + TimestampTz last_start_time = 0; + long wait_time = DEFAULT_NAPTIME_PER_CYCLE; + + now = GetCurrentTimestamp(); + + /* Limit the start retry to once a wal_retrieve_retry_interval */ + if (TimestampDifferenceExceeds(last_start_time, now, + wal_retrieve_retry_interval)) + { + /* Use temporary context for the database list and worker info. */ + subctx = AllocSetContextCreate(TopMemoryContext, + "Logical Replication Launcher sublist", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldctx = MemoryContextSwitchTo(subctx); + + /* Block any concurrent DROP SUBSCRIPTION. */ + LWLockAcquire(LogicalRepLauncherLock, LW_EXCLUSIVE); + + /* search for subscriptions to start or stop. */ + sublist = get_subscription_list(); + + /* Start the missing workers for enabled subscriptions. */ + foreach(lc, sublist) + { + Subscription *sub = (Subscription *) lfirst(lc); + LogicalRepWorker *w; + + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + w = logicalrep_worker_find(sub->oid); + LWLockRelease(LogicalRepWorkerLock); + + if (sub->enabled && w == NULL) + { + logicalrep_worker_launch(sub->dbid, sub->oid, sub->name, sub->owner); + last_start_time = now; + wait_time = wal_retrieve_retry_interval; + /* Limit to one worker per mainloop cycle. */ + break; + } + } + + LWLockRelease(LogicalRepLauncherLock); + + /* Switch back to original memory context. */ + MemoryContextSwitchTo(oldctx); + /* Clean the temporary memory. */ + MemoryContextDelete(subctx); + } + else + { + /* + * The wait in previous cycle was interruped in less than + * wal_retrieve_retry_interval since last worker was started, + * this usually means crash of the worker, so we should retry + * in wal_retrieve_retry_interval again. + */ + wait_time = wal_retrieve_retry_interval; + } + + /* Wait for more work. */ + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + wait_time, + WAIT_EVENT_LOGICAL_LAUNCHER_MAIN); + + /* emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + ResetLatch(&MyProc->procLatch); + } + + LogicalRepCtx->launcher_pid = 0; + + /* ... and if it returns, we're done */ + ereport(LOG, + (errmsg("logical replication launcher shutting down"))); + + proc_exit(0); +} + +/* + * Returns state of the subscriptions. + */ +Datum +pg_stat_get_subscription(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_SUBSCRIPTION_COLS 7 + Oid subid = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0); + int i; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + /* Make sure we get consistent view of the workers. */ + LWLockAcquire(LogicalRepWorkerLock, LW_SHARED); + + for (i = 0; i <= max_logical_replication_workers; i++) + { + /* for each row */ + Datum values[PG_STAT_GET_SUBSCRIPTION_COLS]; + bool nulls[PG_STAT_GET_SUBSCRIPTION_COLS]; + int worker_pid; + LogicalRepWorker worker; + + memcpy(&worker, &LogicalRepCtx->workers[i], + sizeof(LogicalRepWorker)); + if (!worker.proc || !IsBackendPid(worker.proc->pid)) + continue; + + if (OidIsValid(subid) && worker.subid != subid) + continue; + + worker_pid = worker.proc->pid; + + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(worker.subid); + values[1] = Int32GetDatum(worker_pid); + if (XLogRecPtrIsInvalid(worker.last_lsn)) + nulls[2] = true; + else + values[2] = LSNGetDatum(worker.last_lsn); + if (worker.last_send_time == 0) + nulls[3] = true; + else + values[3] = TimestampTzGetDatum(worker.last_send_time); + if (worker.last_recv_time == 0) + nulls[4] = true; + else + values[4] = TimestampTzGetDatum(worker.last_recv_time); + if (XLogRecPtrIsInvalid(worker.reply_lsn)) + nulls[5] = true; + else + values[5] = LSNGetDatum(worker.reply_lsn); + if (worker.reply_time == 0) + nulls[6] = true; + else + values[6] = TimestampTzGetDatum(worker.reply_time); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + + /* If only a single subscription was requested, and we found it, break. */ + if (OidIsValid(subid)) + break; + } + + LWLockRelease(LogicalRepWorkerLock); + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c new file mode 100644 index 0000000000..1f30de606a --- /dev/null +++ b/src/backend/replication/logical/proto.c @@ -0,0 +1,637 @@ +/*------------------------------------------------------------------------- + * + * proto.c + * logical replication protocol functions + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logical/proto.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/sysattr.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_type.h" +#include "libpq/pqformat.h" +#include "replication/logicalproto.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + +/* + * Protocol message flags. + */ +#define LOGICALREP_IS_REPLICA_IDENTITY 1 + +static void logicalrep_write_attrs(StringInfo out, Relation rel); +static void logicalrep_write_tuple(StringInfo out, Relation rel, + HeapTuple tuple); + +static void logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel); +static void logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple); + +static void logicalrep_write_namespace(StringInfo out, Oid nspid); +static const char *logicalrep_read_namespace(StringInfo in); + +/* + * Write BEGIN to the output stream. + */ +void +logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn) +{ + pq_sendbyte(out, 'B'); /* BEGIN */ + + /* fixed fields */ + pq_sendint64(out, txn->final_lsn); + pq_sendint64(out, txn->commit_time); + pq_sendint(out, txn->xid, 4); +} + +/* + * Read transaction BEGIN from the stream. + */ +void +logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data) +{ + /* read fields */ + begin_data->final_lsn = pq_getmsgint64(in); + if (begin_data->final_lsn == InvalidXLogRecPtr) + elog(ERROR, "final_lsn not set in begin message"); + begin_data->committime = pq_getmsgint64(in); + begin_data->xid = pq_getmsgint(in, 4); +} + + +/* + * Write COMMIT to the output stream. + */ +void +logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn) +{ + uint8 flags = 0; + + pq_sendbyte(out, 'C'); /* sending COMMIT */ + + /* send the flags field (unused for now) */ + pq_sendbyte(out, flags); + + /* send fields */ + pq_sendint64(out, commit_lsn); + pq_sendint64(out, txn->end_lsn); + pq_sendint64(out, txn->commit_time); +} + +/* + * Read transaction COMMIT from the stream. + */ +void +logicalrep_read_commit(StringInfo in, LogicalRepCommitData *commit_data) +{ + /* read flags (unused for now) */ + uint8 flags = pq_getmsgbyte(in); + + if (flags != 0) + elog(ERROR, "unknown flags %u in commit message", flags); + + /* read fields */ + commit_data->commit_lsn = pq_getmsgint64(in); + commit_data->end_lsn = pq_getmsgint64(in); + commit_data->committime = pq_getmsgint64(in); +} + +/* + * Write ORIGIN to the output stream. + */ +void +logicalrep_write_origin(StringInfo out, const char *origin, + XLogRecPtr origin_lsn) +{ + pq_sendbyte(out, 'O'); /* ORIGIN */ + + /* fixed fields */ + pq_sendint64(out, origin_lsn); + + /* origin string */ + pq_sendstring(out, origin); +} + +/* + * Read ORIGIN from the output stream. + */ +char * +logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn) +{ + /* fixed fields */ + *origin_lsn = pq_getmsgint64(in); + + /* return origin */ + return pstrdup(pq_getmsgstring(in)); +} + +/* + * Write INSERT to the output stream. + */ +void +logicalrep_write_insert(StringInfo out, Relation rel, HeapTuple newtuple) +{ + pq_sendbyte(out, 'I'); /* action INSERT */ + + Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT || + rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL || + rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX); + + /* use Oid as relation identifier */ + pq_sendint(out, RelationGetRelid(rel), 4); + + pq_sendbyte(out, 'N'); /* new tuple follows */ + logicalrep_write_tuple(out, rel, newtuple); +} + +/* + * Read INSERT from stream. + * + * Fills the new tuple. + */ +LogicalRepRelId +logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup) +{ + char action; + LogicalRepRelId relid; + + /* read the relation id */ + relid = pq_getmsgint(in, 4); + + action = pq_getmsgbyte(in); + if (action != 'N') + elog(ERROR, "expected new tuple but got %d", + action); + + logicalrep_read_tuple(in, newtup); + + return relid; +} + +/* + * Write UPDATE to the output stream. + */ +void +logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple, + HeapTuple newtuple) +{ + pq_sendbyte(out, 'U'); /* action UPDATE */ + + Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT || + rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL || + rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX); + + /* use Oid as relation identifier */ + pq_sendint(out, RelationGetRelid(rel), 4); + + if (oldtuple != NULL) + { + if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL) + pq_sendbyte(out, 'O'); /* old tuple follows */ + else + pq_sendbyte(out, 'K'); /* old key follows */ + logicalrep_write_tuple(out, rel, oldtuple); + } + + pq_sendbyte(out, 'N'); /* new tuple follows */ + logicalrep_write_tuple(out, rel, newtuple); +} + +/* + * Read UPDATE from stream. + */ +LogicalRepRelId +logicalrep_read_update(StringInfo in, bool *has_oldtuple, + LogicalRepTupleData *oldtup, + LogicalRepTupleData *newtup) +{ + char action; + LogicalRepRelId relid; + + /* read the relation id */ + relid = pq_getmsgint(in, 4); + + /* read and verify action */ + action = pq_getmsgbyte(in); + if (action != 'K' && action != 'O' && action != 'N') + elog(ERROR, "expected action 'N', 'O' or 'K', got %c", + action); + + /* check for old tuple */ + if (action == 'K' || action == 'O') + { + logicalrep_read_tuple(in, oldtup); + *has_oldtuple = true; + + action = pq_getmsgbyte(in); + } + else + *has_oldtuple = false; + + /* check for new tuple */ + if (action != 'N') + elog(ERROR, "expected action 'N', got %c", + action); + + logicalrep_read_tuple(in, newtup); + + return relid; +} + +/* + * Write DELETE to the output stream. + */ +void +logicalrep_write_delete(StringInfo out, Relation rel, HeapTuple oldtuple) +{ + Assert(rel->rd_rel->relreplident == REPLICA_IDENTITY_DEFAULT || + rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL || + rel->rd_rel->relreplident == REPLICA_IDENTITY_INDEX); + + pq_sendbyte(out, 'D'); /* action DELETE */ + + /* use Oid as relation identifier */ + pq_sendint(out, RelationGetRelid(rel), 4); + + if (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL) + pq_sendbyte(out, 'O'); /* old tuple follows */ + else + pq_sendbyte(out, 'K'); /* old key follows */ + + logicalrep_write_tuple(out, rel, oldtuple); +} + +/* + * Read DELETE from stream. + * + * Fills the old tuple. + */ +LogicalRepRelId +logicalrep_read_delete(StringInfo in, LogicalRepTupleData *oldtup) +{ + char action; + LogicalRepRelId relid; + + /* read the relation id */ + relid = pq_getmsgint(in, 4); + + /* read and verify action */ + action = pq_getmsgbyte(in); + if (action != 'K' && action != 'O') + elog(ERROR, "expected action 'O' or 'K', got %c", action); + + logicalrep_read_tuple(in, oldtup); + + return relid; +} + +/* + * Write relation description to the output stream. + */ +void +logicalrep_write_rel(StringInfo out, Relation rel) +{ + char *relname; + + pq_sendbyte(out, 'R'); /* sending RELATION */ + + /* use Oid as relation identifier */ + pq_sendint(out, RelationGetRelid(rel), 4); + + /* send qualified relation name */ + logicalrep_write_namespace(out, RelationGetNamespace(rel)); + relname = RelationGetRelationName(rel); + pq_sendstring(out, relname); + + /* send replica identity */ + pq_sendbyte(out, rel->rd_rel->relreplident); + + /* send the attribute info */ + logicalrep_write_attrs(out, rel); +} + +/* + * Read the relation info from stream and return as LogicalRepRelation. + */ +LogicalRepRelation * +logicalrep_read_rel(StringInfo in) +{ + LogicalRepRelation *rel = palloc(sizeof(LogicalRepRelation)); + + rel->remoteid = pq_getmsgint(in, 4); + + /* Read relation name from stream */ + rel->nspname = pstrdup(logicalrep_read_namespace(in)); + rel->relname = pstrdup(pq_getmsgstring(in)); + + /* Read the replica identity. */ + rel->replident = pq_getmsgbyte(in); + + /* Get attribute description */ + logicalrep_read_attrs(in, rel); + + return rel; +} + +/* + * Write type info to the output stream. + * + * This function will always write base type info. + */ +void +logicalrep_write_typ(StringInfo out, Oid typoid) +{ + Oid basetypoid = getBaseType(typoid); + HeapTuple tup; + Form_pg_type typtup; + + pq_sendbyte(out, 'Y'); /* sending TYPE */ + + tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for type %u", basetypoid); + typtup = (Form_pg_type) GETSTRUCT(tup); + + /* use Oid as relation identifier */ + pq_sendint(out, typoid, 4); + + /* send qualified type name */ + logicalrep_write_namespace(out, typtup->typnamespace); + pq_sendstring(out, NameStr(typtup->typname)); + + ReleaseSysCache(tup); +} + +/* + * Read type info from the output stream. + */ +void +logicalrep_read_typ(StringInfo in, LogicalRepTyp *ltyp) +{ + ltyp->remoteid = pq_getmsgint(in, 4); + + /* Read tupe name from stream */ + ltyp->nspname = pstrdup(logicalrep_read_namespace(in)); + ltyp->typname = pstrdup(pq_getmsgstring(in)); +} + +/* + * Write a tuple to the outputstream, in the most efficient format possible. + */ +static void +logicalrep_write_tuple(StringInfo out, Relation rel, HeapTuple tuple) +{ + TupleDesc desc; + Datum values[MaxTupleAttributeNumber]; + bool isnull[MaxTupleAttributeNumber]; + int i; + uint16 nliveatts = 0; + + desc = RelationGetDescr(rel); + + for (i = 0; i < desc->natts; i++) + { + if (desc->attrs[i]->attisdropped) + continue; + nliveatts++; + } + pq_sendint(out, nliveatts, 2); + + /* try to allocate enough memory from the get-go */ + enlargeStringInfo(out, tuple->t_len + + nliveatts * (1 + 4)); + + heap_deform_tuple(tuple, desc, values, isnull); + + /* Write the values */ + for (i = 0; i < desc->natts; i++) + { + HeapTuple typtup; + Form_pg_type typclass; + Form_pg_attribute att = desc->attrs[i]; + char *outputstr; + int len; + + /* skip dropped columns */ + if (att->attisdropped) + continue; + + if (isnull[i]) + { + pq_sendbyte(out, 'n'); /* null column */ + continue; + } + else if (att->attlen == -1 && VARATT_IS_EXTERNAL_ONDISK(values[i])) + { + pq_sendbyte(out, 'u'); /* unchanged toast column */ + continue; + } + + typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(att->atttypid)); + if (!HeapTupleIsValid(typtup)) + elog(ERROR, "cache lookup failed for type %u", att->atttypid); + typclass = (Form_pg_type) GETSTRUCT(typtup); + + pq_sendbyte(out, 't'); /* 'text' data follows */ + + outputstr = OidOutputFunctionCall(typclass->typoutput, values[i]); + len = strlen(outputstr) + 1; /* null terminated */ + pq_sendint(out, len, 4); /* length */ + appendBinaryStringInfo(out, outputstr, len); /* data */ + + pfree(outputstr); + + ReleaseSysCache(typtup); + } +} + +/* + * Read tuple in remote format from stream. + * + * The returned tuple points into the input stringinfo. + */ +static void +logicalrep_read_tuple(StringInfo in, LogicalRepTupleData *tuple) +{ + int i; + int natts; + + /* Get of attributes. */ + natts = pq_getmsgint(in, 2); + + memset(tuple->changed, 0, sizeof(tuple->changed)); + + /* Read the data */ + for (i = 0; i < natts; i++) + { + char kind; + int len; + + kind = pq_getmsgbyte(in); + + switch (kind) + { + case 'n': /* null */ + tuple->values[i] = NULL; + tuple->changed[i] = true; + break; + case 'u': /* unchanged column */ + tuple->values[i] = (char *) 0xdeadbeef; /* make bad usage more obvious */ + break; + case 't': /* text formatted value */ + { + tuple->changed[i] = true; + + len = pq_getmsgint(in, 4); /* read length */ + + /* and data */ + tuple->values[i] = (char *) pq_getmsgbytes(in, len); + } + break; + default: + elog(ERROR, "unknown data representation type '%c'", kind); + } + } +} + +/* + * Write relation attributes to the stream. + */ +static void +logicalrep_write_attrs(StringInfo out, Relation rel) +{ + TupleDesc desc; + int i; + uint16 nliveatts = 0; + Bitmapset *idattrs = NULL; + bool replidentfull; + + desc = RelationGetDescr(rel); + + /* send number of live attributes */ + for (i = 0; i < desc->natts; i++) + { + if (desc->attrs[i]->attisdropped) + continue; + nliveatts++; + } + pq_sendint(out, nliveatts, 2); + + /* fetch bitmap of REPLICATION IDENTITY attributes */ + replidentfull = (rel->rd_rel->relreplident == REPLICA_IDENTITY_FULL); + if (!replidentfull) + idattrs = RelationGetIndexAttrBitmap(rel, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + + /* send the attributes */ + for (i = 0; i < desc->natts; i++) + { + Form_pg_attribute att = desc->attrs[i]; + uint8 flags = 0; + + if (att->attisdropped) + continue; + + /* REPLICA IDENTITY FULL means all colums are sent as part of key. */ + if (replidentfull || + bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber, + idattrs)) + flags |= LOGICALREP_IS_REPLICA_IDENTITY; + + pq_sendbyte(out, flags); + + /* attribute name */ + pq_sendstring(out, NameStr(att->attname)); + + /* attribute type id */ + pq_sendint(out, (int) att->atttypid, sizeof(att->atttypid)); + + /* attribute mode */ + pq_sendint(out, att->atttypmod, sizeof(att->atttypmod)); + } + + bms_free(idattrs); +} + +/* + * Read relation attribute names from the stream. + */ +static void +logicalrep_read_attrs(StringInfo in, LogicalRepRelation *rel) +{ + int i; + int natts; + char **attnames; + Oid *atttyps; + Bitmapset *attkeys = NULL; + + natts = pq_getmsgint(in, 2); + attnames = palloc(natts * sizeof(char *)); + atttyps = palloc(natts * sizeof(Oid)); + + /* read the attributes */ + for (i = 0; i < natts; i++) + { + uint8 flags; + + /* Check for replica identity column */ + flags = pq_getmsgbyte(in); + if (flags & LOGICALREP_IS_REPLICA_IDENTITY) + attkeys = bms_add_member(attkeys, i); + + /* attribute name */ + attnames[i] = pstrdup(pq_getmsgstring(in)); + + /* attribute type id */ + atttyps[i] = (Oid) pq_getmsgint(in, 4); + + /* we ignore attribute mode for now */ + (void) pq_getmsgint(in, 4); + } + + rel->attnames = attnames; + rel->atttyps = atttyps; + rel->attkeys = attkeys; + rel->natts = natts; +} + +/* + * Write the namespace name or empty string for pg_catalog (to save space). + */ +static void +logicalrep_write_namespace(StringInfo out, Oid nspid) +{ + if (nspid == PG_CATALOG_NAMESPACE) + pq_sendbyte(out, '\0'); + else + { + char *nspname = get_namespace_name(nspid); + + if (nspname == NULL) + elog(ERROR, "cache lookup failed for namespace %u", + nspid); + + pq_sendstring(out, nspname); + } +} + +/* + * Read the namespace name while treating empty string as pg_catalog. + */ +static const char * +logicalrep_read_namespace(StringInfo in) +{ + const char *nspname = pq_getmsgstring(in); + + if (nspname[0] == '\0') + nspname = "pg_catalog"; + + return nspname; +} diff --git a/src/backend/replication/logical/relation.c b/src/backend/replication/logical/relation.c new file mode 100644 index 0000000000..383c6ebe76 --- /dev/null +++ b/src/backend/replication/logical/relation.c @@ -0,0 +1,489 @@ +/*------------------------------------------------------------------------- + * relation.c + * PostgreSQL logical replication + * + * Copyright (c) 2012-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logical/relation.c + * + * NOTES + * This file contains helper functions for logical replication relation + * mapping cache. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/heapam.h" +#include "access/sysattr.h" +#include "catalog/namespace.h" +#include "nodes/makefuncs.h" +#include "replication/logicalrelation.h" +#include "replication/worker_internal.h" +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/syscache.h" + +static MemoryContext LogicalRepRelMapContext = NULL; + +static HTAB *LogicalRepRelMap = NULL; +static HTAB *LogicalRepTypMap = NULL; + +static void logicalrep_typmap_invalidate_cb(Datum arg, int cacheid, + uint32 hashvalue); + +/* + * Relcache invalidation callback for our relation map cache. + */ +static void +logicalrep_relmap_invalidate_cb(Datum arg, Oid reloid) +{ + LogicalRepRelMapEntry *entry; + + /* Just to be sure. */ + if (LogicalRepRelMap == NULL) + return; + + if (reloid != InvalidOid) + { + HASH_SEQ_STATUS status; + + hash_seq_init(&status, LogicalRepRelMap); + + /* TODO, use inverse lookup hashtable? */ + while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL) + { + if (entry->localreloid == reloid) + { + entry->localreloid = InvalidOid; + hash_seq_term(&status); + break; + } + } + } + else + { + /* invalidate all cache entries */ + HASH_SEQ_STATUS status; + + hash_seq_init(&status, LogicalRepRelMap); + + while ((entry = (LogicalRepRelMapEntry *) hash_seq_search(&status)) != NULL) + entry->localreloid = InvalidOid; + } +} + +/* + * Initialize the relation map cache. + */ +static void +logicalrep_relmap_init() +{ + HASHCTL ctl; + + if (!LogicalRepRelMapContext) + LogicalRepRelMapContext = + AllocSetContextCreate(CacheMemoryContext, + "LogicalRepRelMapContext", + ALLOCSET_DEFAULT_SIZES); + + /* Initialize the relation hash table. */ + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(LogicalRepRelId); + ctl.entrysize = sizeof(LogicalRepRelMapEntry); + ctl.hcxt = LogicalRepRelMapContext; + + LogicalRepRelMap = hash_create("logicalrep relation map cache", 128, &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* Initialize the type hash table. */ + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(LogicalRepTyp); + ctl.hcxt = LogicalRepRelMapContext; + + /* This will usually be small. */ + LogicalRepTypMap = hash_create("logicalrep type map cache", 2, &ctl, + HASH_ELEM | HASH_BLOBS |HASH_CONTEXT); + + /* Watch for invalidation events. */ + CacheRegisterRelcacheCallback(logicalrep_relmap_invalidate_cb, + (Datum) 0); + CacheRegisterSyscacheCallback(TYPEOID, logicalrep_typmap_invalidate_cb, + (Datum) 0); +} + +/* + * Free the entry of a relation map cache. + */ +static void +logicalrep_relmap_free_entry(LogicalRepRelMapEntry *entry) +{ + LogicalRepRelation *remoterel; + + remoterel = &entry->remoterel; + + pfree(remoterel->nspname); + pfree(remoterel->relname); + + if (remoterel->natts > 0) + { + int i; + + for (i = 0; i < remoterel->natts; i++) + pfree(remoterel->attnames[i]); + + pfree(remoterel->attnames); + pfree(remoterel->atttyps); + } + remoterel->attnames = NULL; + remoterel->atttyps = NULL; + + bms_free(remoterel->attkeys); + remoterel->attkeys = NULL; + + if (entry->attrmap) + pfree(entry->attrmap); + + entry->attrmap = NULL; + remoterel->natts = 0; + entry->localreloid = InvalidOid; + entry->localrel = NULL; +} + +/* + * Add new entry or update existing entry in the relation map cache. + * + * Called when new relation mapping is sent by the publisher to update + * our expected view of incoming data from said publisher. + */ +void +logicalrep_relmap_update(LogicalRepRelation *remoterel) +{ + MemoryContext oldctx; + LogicalRepRelMapEntry *entry; + bool found; + int i; + + if (LogicalRepRelMap == NULL) + logicalrep_relmap_init(); + + /* + * HASH_ENTER returns the existing entry if present or creates a new one. + */ + entry = hash_search(LogicalRepRelMap, (void *) &remoterel->remoteid, + HASH_ENTER, &found); + + if (found) + logicalrep_relmap_free_entry(entry); + + /* Make cached copy of the data */ + oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext); + entry->remoterel.remoteid = remoterel->remoteid; + entry->remoterel.nspname = pstrdup(remoterel->nspname); + entry->remoterel.relname = pstrdup(remoterel->relname); + entry->remoterel.natts = remoterel->natts; + entry->remoterel.attnames = palloc(remoterel->natts * sizeof(char *)); + entry->remoterel.atttyps = palloc(remoterel->natts * sizeof(Oid)); + for (i = 0; i < remoterel->natts; i++) + { + entry->remoterel.attnames[i] = pstrdup(remoterel->attnames[i]); + entry->remoterel.atttyps[i] = remoterel->atttyps[i]; + } + entry->remoterel.replident = remoterel->replident; + entry->remoterel.attkeys = bms_copy(remoterel->attkeys); + entry->attrmap = NULL; + entry->localreloid = InvalidOid; + MemoryContextSwitchTo(oldctx); +} + +/* + * Find attribute index in TupleDesc struct by attribute name. + * + * Returns -1 if not found. + */ +static int +logicalrep_rel_att_by_name(LogicalRepRelation *remoterel, const char *attname) +{ + int i; + + for (i = 0; i < remoterel->natts; i++) + { + if (strcmp(remoterel->attnames[i], attname) == 0) + return i; + } + + return -1; +} + +/* + * Open the local relation associated with the remote one. + * + * Optionally rebuilds the Relcache mapping if it was invalidated + * by local DDL. + */ +LogicalRepRelMapEntry * +logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) +{ + LogicalRepRelMapEntry *entry; + bool found; + + if (LogicalRepRelMap == NULL) + logicalrep_relmap_init(); + + /* Search for existing entry. */ + entry = hash_search(LogicalRepRelMap, (void *) &remoteid, + HASH_FIND, &found); + + if (!found) + elog(ERROR, "no relation map entry for remote relation ID %u", + remoteid); + + /* Need to update the local cache? */ + if (!OidIsValid(entry->localreloid)) + { + Oid relid; + int i; + int found; + Bitmapset *idkey; + TupleDesc desc; + LogicalRepRelation *remoterel; + MemoryContext oldctx; + remoterel = &entry->remoterel; + + /* Try to find and lock the relation by name. */ + relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname, + remoterel->relname, -1), + lockmode, true); + if (!OidIsValid(relid)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication target relation \"%s.%s\" does not exist", + remoterel->nspname, remoterel->relname))); + entry->localrel = heap_open(relid, NoLock); + + /* + * We currently only support writing to regular and partitioned + * tables. + */ + if (entry->localrel->rd_rel->relkind != RELKIND_RELATION) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("logical replication target relation \"%s.%s\" is not a table", + remoterel->nspname, remoterel->relname))); + + /* + * Build the mapping of local attribute numbers to remote attribute + * numbers and validate that we don't miss any replicated columns + * as that would result in potentially unwanted data loss. + */ + desc = RelationGetDescr(entry->localrel); + oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext); + entry->attrmap = palloc(desc->natts * sizeof(int)); + MemoryContextSwitchTo(oldctx); + + found = 0; + for (i = 0; i < desc->natts; i++) + { + int attnum = logicalrep_rel_att_by_name(remoterel, + NameStr(desc->attrs[i]->attname)); + entry->attrmap[i] = attnum; + if (attnum >= 0) + found++; + } + + /* TODO, detail message with names of missing columns */ + if (found < remoterel->natts) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication target relation \"%s.%s\" is missing " + "some replicated columns", + remoterel->nspname, remoterel->relname))); + + /* + * Check that replica identity matches. We allow for stricter replica + * identity (fewer columns) on subscriber as that will not stop us + * from finding unique tuple. IE, if publisher has identity + * (id,timestamp) and subscriber just (id) this will not be a problem, + * but in the opposite scenario it will. + * + * Don't throw any error here just mark the relation entry as not + * updatable, as replica identity is only for updates and deletes + * but inserts can be replicated even without it. + */ + entry->updatable = true; + idkey = RelationGetIndexAttrBitmap(entry->localrel, + INDEX_ATTR_BITMAP_IDENTITY_KEY); + /* fallback to PK if no replica identity */ + if (idkey == NULL) + { + idkey = RelationGetIndexAttrBitmap(entry->localrel, + INDEX_ATTR_BITMAP_PRIMARY_KEY); + /* + * If no replica identity index and no PK, the published table + * must have replica identity FULL. + */ + if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL) + entry->updatable = false; + } + + i = -1; + while ((i = bms_next_member(idkey, i)) >= 0) + { + int attnum = i + FirstLowInvalidHeapAttributeNumber; + + if (!AttrNumberIsForUserDefinedAttr(attnum)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication target relation \"%s.%s\" uses " + "system columns in REPLICA IDENTITY index", + remoterel->nspname, remoterel->relname))); + + attnum = AttrNumberGetAttrOffset(attnum); + + if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys)) + { + entry->updatable = false; + break; + } + } + + entry->localreloid = relid; + } + else + entry->localrel = heap_open(entry->localreloid, lockmode); + + return entry; +} + +/* + * Close the previously opened logical relation. + */ +void +logicalrep_rel_close(LogicalRepRelMapEntry *rel, LOCKMODE lockmode) +{ + heap_close(rel->localrel, lockmode); + rel->localrel = NULL; +} + + +/* + * Type cache invalidation callback for our type map cache. + */ +static void +logicalrep_typmap_invalidate_cb(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + LogicalRepTyp *entry; + + /* Just to be sure. */ + if (LogicalRepTypMap == NULL) + return; + + /* invalidate all cache entries */ + hash_seq_init(&status, LogicalRepTypMap); + + while ((entry = (LogicalRepTyp *) hash_seq_search(&status)) != NULL) + entry->typoid = InvalidOid; +} + +/* + * Free the type map cache entry data. + */ +static void +logicalrep_typmap_free_entry(LogicalRepTyp *entry) +{ + pfree(entry->nspname); + pfree(entry->typname); + + entry->typoid = InvalidOid; +} + +/* + * Add new entry or update existing entry in the type map cache. + */ +void +logicalrep_typmap_update(LogicalRepTyp *remotetyp) +{ + MemoryContext oldctx; + LogicalRepTyp *entry; + bool found; + + if (LogicalRepTypMap == NULL) + logicalrep_relmap_init(); + + /* + * HASH_ENTER returns the existing entry if present or creates a new one. + */ + entry = hash_search(LogicalRepTypMap, (void *) &remotetyp->remoteid, + HASH_ENTER, &found); + + if (found) + logicalrep_typmap_free_entry(entry); + + /* Make cached copy of the data */ + entry->remoteid = remotetyp->remoteid; + oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext); + entry->nspname = pstrdup(remotetyp->nspname); + entry->typname = pstrdup(remotetyp->typname); + MemoryContextSwitchTo(oldctx); + entry->typoid = InvalidOid; +} + +/* + * Fetch type info from the cache. + */ +Oid +logicalrep_typmap_getid(Oid remoteid) +{ + LogicalRepTyp *entry; + bool found; + Oid nspoid; + + /* Internal types are mapped directly. */ + if (remoteid < FirstNormalObjectId) + { + if (!get_typisdefined(remoteid)) + ereport(ERROR, + (errmsg("builtin type %u not found", remoteid), + errhint("This can be caused by having publisher with " + "higher major version than subscriber"))); + return remoteid; + } + + if (LogicalRepTypMap == NULL) + logicalrep_relmap_init(); + + /* Try finding the mapping. */ + entry = hash_search(LogicalRepTypMap, (void *) &remoteid, + HASH_FIND, &found); + + if (!found) + elog(ERROR, "no type map entry for remote type %u", + remoteid); + + /* Found and mapped, return the oid. */ + if (OidIsValid(entry->typoid)) + return entry->typoid; + + /* Otherwise, try to map to local type. */ + nspoid = LookupExplicitNamespace(entry->nspname, true); + if (OidIsValid(nspoid)) + entry->typoid = GetSysCacheOid2(TYPENAMENSP, + PointerGetDatum(entry->typname), + ObjectIdGetDatum(nspoid)); + else + entry->typoid = InvalidOid; + + if (!OidIsValid(entry->typoid)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("data type \"%s.%s\" required for logical replication does not exist", + entry->nspname, entry->typname))); + + return entry->typoid; +} diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c new file mode 100644 index 0000000000..7d86736444 --- /dev/null +++ b/src/backend/replication/logical/worker.c @@ -0,0 +1,1429 @@ +/*------------------------------------------------------------------------- + * worker.c + * PostgreSQL logical replication worker (apply) + * + * Copyright (c) 2012-2016, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/logical/worker.c + * + * NOTES + * This file contains the worker which applies logical changes as they come + * from remote logical replication stream. + * + * The main worker (apply) is started by logical replication worker + * launcher for every enabled subscription in a database. It uses + * walsender protocol to communicate with publisher. + * + * The apply worker may spawn additional workers (sync) for initial data + * synchronization of tables. + * + * This module includes server facing code and shares libpqwalreceiver + * module with walreceiver for providing the libpq specific functionality. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "pgstat.h" +#include "funcapi.h" + +#include "access/xact.h" +#include "access/xlog_internal.h" + +#include "catalog/namespace.h" +#include "catalog/pg_subscription.h" + +#include "commands/trigger.h" + +#include "executor/executor.h" +#include "executor/nodeModifyTable.h" + +#include "libpq/pqformat.h" +#include "libpq/pqsignal.h" + +#include "mb/pg_wchar.h" + +#include "nodes/makefuncs.h" + +#include "optimizer/planner.h" + +#include "parser/parse_relation.h" + +#include "postmaster/bgworker.h" +#include "postmaster/postmaster.h" + +#include "replication/decode.h" +#include "replication/logical.h" +#include "replication/logicalproto.h" +#include "replication/logicalrelation.h" +#include "replication/logicalworker.h" +#include "replication/reorderbuffer.h" +#include "replication/origin.h" +#include "replication/snapbuild.h" +#include "replication/walreceiver.h" +#include "replication/worker_internal.h" + +#include "rewrite/rewriteHandler.h" + +#include "storage/bufmgr.h" +#include "storage/ipc.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "storage/procarray.h" + +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/datum.h" +#include "utils/fmgroids.h" +#include "utils/guc.h" +#include "utils/inval.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/timeout.h" +#include "utils/tqual.h" +#include "utils/syscache.h" + +#define NAPTIME_PER_CYCLE 1000 /* max sleep time between cycles (1s) */ + +typedef struct FlushPosition +{ + dlist_node node; + XLogRecPtr local_end; + XLogRecPtr remote_end; +} FlushPosition; + +static dlist_head lsn_mapping = DLIST_STATIC_INIT(lsn_mapping); + +typedef struct SlotErrCallbackArg +{ + LogicalRepRelation *rel; + int attnum; +} SlotErrCallbackArg; + +static MemoryContext ApplyContext = NULL; +static MemoryContext ApplyCacheContext = NULL; + +WalReceiverConn *wrconn = NULL; + +Subscription *MySubscription = NULL; +bool MySubscriptionValid = false; + +bool in_remote_transaction = false; + +static void send_feedback(XLogRecPtr recvpos, bool force, bool requestReply); + +static void store_flush_position(XLogRecPtr remote_lsn); + +static void reread_subscription(void); + +/* + * Make sure that we started local transaction. + * + * Also switches to ApplyContext as necessary. + */ +static bool +ensure_transaction(void) +{ + if (IsTransactionState()) + { + if (CurrentMemoryContext != ApplyContext) + MemoryContextSwitchTo(ApplyContext); + return false; + } + + StartTransactionCommand(); + + if (!MySubscriptionValid) + reread_subscription(); + + MemoryContextSwitchTo(ApplyContext); + return true; +} + + +/* + * Executor state preparation for evaluation of constraint expressions, + * indexes and triggers. + * + * This is based on similar code in copy.c + */ +static EState * +create_estate_for_relation(LogicalRepRelMapEntry *rel) +{ + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + + estate = CreateExecutorState(); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel->localrel); + rte->relkind = rel->localrel->rd_rel->relkind; + estate->es_range_table = list_make1(rte); + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel->localrel, 1, NULL, 0); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + /* Triggers might need a slot */ + if (resultRelInfo->ri_TrigDesc) + estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate); + + return estate; +} + +/* + * Executes default values for columns for which we can't map to remote + * relation columns. + * + * This allows us to support tables which have more columns on the downstream + * than on the upstream. + */ +static void +slot_fill_defaults(LogicalRepRelMapEntry *rel, EState *estate, + TupleTableSlot *slot) +{ + TupleDesc desc = RelationGetDescr(rel->localrel); + int num_phys_attrs = desc->natts; + int i; + int attnum, + num_defaults = 0; + int *defmap; + ExprState **defexprs; + ExprContext *econtext; + + econtext = GetPerTupleExprContext(estate); + + /* We got all the data via replication, no need to evaluate anything. */ + if (num_phys_attrs == rel->remoterel.natts) + return; + + defmap = (int *) palloc(num_phys_attrs * sizeof(int)); + defexprs = (ExprState **) palloc(num_phys_attrs * sizeof(ExprState *)); + + for (attnum = 0; attnum < num_phys_attrs; attnum++) + { + Expr *defexpr; + + if (desc->attrs[attnum]->attisdropped) + continue; + + if (rel->attrmap[attnum] >= 0) + continue; + + defexpr = (Expr *) build_column_default(rel->localrel, attnum + 1); + + if (defexpr != NULL) + { + /* Run the expression through planner */ + defexpr = expression_planner(defexpr); + + /* Initialize executable expression in copycontext */ + defexprs[num_defaults] = ExecInitExpr(defexpr, NULL); + defmap[num_defaults] = attnum; + num_defaults++; + } + + } + + for (i = 0; i < num_defaults; i++) + slot->tts_values[defmap[i]] = + ExecEvalExpr(defexprs[i], econtext, &slot->tts_isnull[defmap[i]]); +} + +/* + * Error callback to give more context info about type conversion failure. + */ +static void +slot_store_error_callback(void *arg) +{ + SlotErrCallbackArg *errarg = (SlotErrCallbackArg *) arg; + Oid remotetypoid, + localtypoid; + + if (errarg->attnum < 0) + return; + + remotetypoid = errarg->rel->atttyps[errarg->attnum]; + localtypoid = logicalrep_typmap_getid(remotetypoid); + errcontext("processing remote data for replication target relation \"%s.%s\" column \"%s\", " + "remote type %s, local type %s", + errarg->rel->nspname, errarg->rel->relname, + errarg->rel->attnames[errarg->attnum], + format_type_be(remotetypoid), + format_type_be(localtypoid)); +} + +/* + * Store data in C string form into slot. + * This is similar to BuildTupleFromCStrings but TupleTableSlot fits our + * use better. + */ +static void +slot_store_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, + char **values) +{ + int natts = slot->tts_tupleDescriptor->natts; + int i; + SlotErrCallbackArg errarg; + ErrorContextCallback errcallback; + + ExecClearTuple(slot); + + /* Push callback + info on the error context stack */ + errarg.rel = &rel->remoterel; + errarg.attnum = -1; + errcallback.callback = slot_store_error_callback; + errcallback.arg = (void *) &errarg; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* Call the "in" function for each non-dropped attribute */ + for (i = 0; i < natts; i++) + { + Form_pg_attribute att = slot->tts_tupleDescriptor->attrs[i]; + int remoteattnum = rel->attrmap[i]; + + if (!att->attisdropped && remoteattnum >= 0 && + values[remoteattnum] != NULL) + { + Oid typinput; + Oid typioparam; + + errarg.attnum = remoteattnum; + + getTypeInputInfo(att->atttypid, &typinput, &typioparam); + slot->tts_values[i] = OidInputFunctionCall(typinput, + values[remoteattnum], + typioparam, + att->atttypmod); + slot->tts_isnull[i] = false; + } + else + { + /* + * We assign NULL to dropped attributes, NULL values, and missing + * values (missing values should be later filled using + * slot_fill_defaults). + */ + slot->tts_values[i] = (Datum) 0; + slot->tts_isnull[i] = true; + } + } + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + + ExecStoreVirtualTuple(slot); +} + +/* + * Modify slot with user data provided as C strigs. + * This is somewhat similar to heap_modify_tuple but also calls the type + * input fuction on the user data as the input is the text representation + * of the types. + */ +static void +slot_modify_cstrings(TupleTableSlot *slot, LogicalRepRelMapEntry *rel, + char **values, bool *replaces) +{ + int natts = slot->tts_tupleDescriptor->natts; + int i; + SlotErrCallbackArg errarg; + ErrorContextCallback errcallback; + + slot_getallattrs(slot); + ExecClearTuple(slot); + + /* Push callback + info on the error context stack */ + errarg.rel = &rel->remoterel; + errarg.attnum = -1; + errcallback.callback = slot_store_error_callback; + errcallback.arg = (void *) &errarg; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* Call the "in" function for each replaced attribute */ + for (i = 0; i < natts; i++) + { + Form_pg_attribute att = slot->tts_tupleDescriptor->attrs[i]; + int remoteattnum = rel->attrmap[i]; + + if (remoteattnum >= 0 && !replaces[remoteattnum]) + continue; + + if (remoteattnum >= 0 && values[remoteattnum] != NULL) + { + Oid typinput; + Oid typioparam; + + errarg.attnum = remoteattnum; + + getTypeInputInfo(att->atttypid, &typinput, &typioparam); + slot->tts_values[i] = OidInputFunctionCall(typinput, values[i], + typioparam, + att->atttypmod); + slot->tts_isnull[i] = false; + } + else + { + slot->tts_values[i] = (Datum) 0; + slot->tts_isnull[i] = true; + } + } + + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + + ExecStoreVirtualTuple(slot); +} + +/* + * Handle BEGIN message. + */ +static void +apply_handle_begin(StringInfo s) +{ + LogicalRepBeginData begin_data; + + logicalrep_read_begin(s, &begin_data); + + replorigin_session_origin_timestamp = begin_data.committime; + replorigin_session_origin_lsn = begin_data.final_lsn; + + in_remote_transaction = true; + + pgstat_report_activity(STATE_RUNNING, NULL); +} + +/* + * Handle COMMIT message. + * + * TODO, support tracking of multiple origins + */ +static void +apply_handle_commit(StringInfo s) +{ + LogicalRepCommitData commit_data; + + logicalrep_read_commit(s, &commit_data); + + Assert(commit_data.commit_lsn == replorigin_session_origin_lsn); + Assert(commit_data.committime == replorigin_session_origin_timestamp); + + if (IsTransactionState()) + { + CommitTransactionCommand(); + + store_flush_position(commit_data.end_lsn); + } + + in_remote_transaction = false; + + pgstat_report_activity(STATE_IDLE, NULL); +} + +/* + * Handle ORIGIN message. + * + * TODO, support tracking of multiple origins + */ +static void +apply_handle_origin(StringInfo s) +{ + /* + * ORIGIN message can only come inside remote transaction and before + * any actual writes. + */ + if (!in_remote_transaction || IsTransactionState()) + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("ORIGIN message sent out of order"))); +} + +/* + * Handle RELATION message. + * + * Note we don't do validation against local schema here. The validation + * against local schema is postponed until first change for given relation + * comes as we only care about it when applying changes for it anyway and we + * do less locking this way. + */ +static void +apply_handle_relation(StringInfo s) +{ + LogicalRepRelation *rel; + + rel = logicalrep_read_rel(s); + logicalrep_relmap_update(rel); +} + +/* + * Handle TYPE message. + * + * Note we don't do local mapping here, that's done when the type is + * actually used. + */ +static void +apply_handle_type(StringInfo s) +{ + LogicalRepTyp typ; + + logicalrep_read_typ(s, &typ); + logicalrep_typmap_update(&typ); +} + +/* + * Get replica identity index or if it is not defined a primary key. + * + * If neither is defined, returns InvalidOid + */ +static Oid +GetRelationIdentityOrPK(Relation rel) +{ + Oid idxoid; + + idxoid = RelationGetReplicaIndex(rel); + + if (!OidIsValid(idxoid)) + idxoid = RelationGetPrimaryKeyIndex(rel); + + return idxoid; +} + +/* + * Handle INSERT message. + */ +static void +apply_handle_insert(StringInfo s) +{ + LogicalRepRelMapEntry *rel; + LogicalRepTupleData newtup; + LogicalRepRelId relid; + EState *estate; + TupleTableSlot *remoteslot; + MemoryContext oldctx; + + ensure_transaction(); + + relid = logicalrep_read_insert(s, &newtup); + rel = logicalrep_rel_open(relid, RowExclusiveLock); + + /* Initialize the executor state. */ + estate = create_estate_for_relation(rel); + remoteslot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel)); + + /* Process and store remote tuple in the slot */ + oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + slot_store_cstrings(remoteslot, rel, newtup.values); + slot_fill_defaults(rel, estate, remoteslot); + MemoryContextSwitchTo(oldctx); + + PushActiveSnapshot(GetTransactionSnapshot()); + ExecOpenIndices(estate->es_result_relation_info, false); + + /* Do the insert. */ + ExecSimpleRelationInsert(estate, remoteslot); + + /* Cleanup. */ + ExecCloseIndices(estate->es_result_relation_info); + PopActiveSnapshot(); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); + + logicalrep_rel_close(rel, NoLock); + + CommandCounterIncrement(); +} + +/* + * Check if the logical replication relation is updatable and throw + * appropriate error if it isn't. + */ +static void +check_relation_updatable(LogicalRepRelMapEntry *rel) +{ + /* Updatable, no error. */ + if (rel->updatable) + return; + + /* + * We are in error mode so it's fine this is somewhat slow. + * It's better to give user correct error. + */ + if (OidIsValid(GetRelationIdentityOrPK(rel->localrel))) + { + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("publisher does not send replica identity column " + "expected by the logical replication target relation \"%s.%s\"", + rel->remoterel.nspname, rel->remoterel.relname))); + } + + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("logical replication target relation \"%s.%s\" has " + "neither REPLICA IDENTIY index nor PRIMARY " + "KEY and published relation does not have " + "REPLICA IDENTITY FULL", + rel->remoterel.nspname, rel->remoterel.relname))); +} + +/* + * Handle UPDATE message. + * + * TODO: FDW support + */ +static void +apply_handle_update(StringInfo s) +{ + LogicalRepRelMapEntry *rel; + LogicalRepRelId relid; + Oid idxoid; + EState *estate; + EPQState epqstate; + LogicalRepTupleData oldtup; + LogicalRepTupleData newtup; + bool has_oldtup; + TupleTableSlot *localslot; + TupleTableSlot *remoteslot; + bool found; + MemoryContext oldctx; + + ensure_transaction(); + + relid = logicalrep_read_update(s, &has_oldtup, &oldtup, + &newtup); + rel = logicalrep_rel_open(relid, RowExclusiveLock); + + /* Check if we can do the update. */ + check_relation_updatable(rel); + + /* Initialize the executor state. */ + estate = create_estate_for_relation(rel); + remoteslot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel)); + localslot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(localslot, RelationGetDescr(rel->localrel)); + EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); + + PushActiveSnapshot(GetTransactionSnapshot()); + ExecOpenIndices(estate->es_result_relation_info, false); + + /* Build the search tuple. */ + oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + slot_store_cstrings(remoteslot, rel, + has_oldtup ? oldtup.values : newtup.values); + MemoryContextSwitchTo(oldctx); + + /* + * Try to find tuple using either replica identity index, primary key + * or if needed, sequential scan. + */ + idxoid = GetRelationIdentityOrPK(rel->localrel); + Assert(OidIsValid(idxoid) || + (rel->remoterel.replident == REPLICA_IDENTITY_FULL && has_oldtup)); + + if (OidIsValid(idxoid)) + found = RelationFindReplTupleByIndex(rel->localrel, idxoid, + LockTupleExclusive, + remoteslot, localslot); + else + found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive, + remoteslot, localslot); + + ExecClearTuple(remoteslot); + + /* + * Tuple found. + * + * Note this will fail if there are other conflicting unique indexes. + */ + if (found) + { + /* Process and store remote tuple in the slot */ + oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + ExecStoreTuple(localslot->tts_tuple, remoteslot, InvalidBuffer, false); + slot_modify_cstrings(remoteslot, rel, newtup.values, newtup.changed); + MemoryContextSwitchTo(oldctx); + + EvalPlanQualSetSlot(&epqstate, remoteslot); + + /* Do the actual update. */ + ExecSimpleRelationUpdate(estate, &epqstate, localslot, remoteslot); + } + else + { + /* + * The tuple to be updated could not be found. + * + * TODO what to do here, change the log level to LOG perhaps? + */ + elog(DEBUG1, + "logical replication did not find row for update " + "in replication target relation \"%s\"", + RelationGetRelationName(rel->localrel)); + } + + /* Cleanup. */ + ExecCloseIndices(estate->es_result_relation_info); + PopActiveSnapshot(); + EvalPlanQualEnd(&epqstate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); + + logicalrep_rel_close(rel, NoLock); + + CommandCounterIncrement(); +} + +/* + * Handle DELETE message. + * + * TODO: FDW support + */ +static void +apply_handle_delete(StringInfo s) +{ + LogicalRepRelMapEntry *rel; + LogicalRepTupleData oldtup; + LogicalRepRelId relid; + Oid idxoid; + EState *estate; + EPQState epqstate; + TupleTableSlot *remoteslot; + TupleTableSlot *localslot; + bool found; + MemoryContext oldctx; + + ensure_transaction(); + + relid = logicalrep_read_delete(s, &oldtup); + rel = logicalrep_rel_open(relid, RowExclusiveLock); + + /* Check if we can do the delete. */ + check_relation_updatable(rel); + + /* Initialize the executor state. */ + estate = create_estate_for_relation(rel); + remoteslot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(remoteslot, RelationGetDescr(rel->localrel)); + localslot = ExecInitExtraTupleSlot(estate); + ExecSetSlotDescriptor(localslot, RelationGetDescr(rel->localrel)); + EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1); + + PushActiveSnapshot(GetTransactionSnapshot()); + ExecOpenIndices(estate->es_result_relation_info, false); + + /* Find the tuple using the replica identity index. */ + oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + slot_store_cstrings(remoteslot, rel, oldtup.values); + MemoryContextSwitchTo(oldctx); + + /* + * Try to find tuple using either replica identity index, primary key + * or if needed, sequential scan. + */ + idxoid = GetRelationIdentityOrPK(rel->localrel); + Assert(OidIsValid(idxoid) || + (rel->remoterel.replident == REPLICA_IDENTITY_FULL)); + + if (OidIsValid(idxoid)) + found = RelationFindReplTupleByIndex(rel->localrel, idxoid, + LockTupleExclusive, + remoteslot, localslot); + else + found = RelationFindReplTupleSeq(rel->localrel, LockTupleExclusive, + remoteslot, localslot); + /* If found delete it. */ + if (found) + { + EvalPlanQualSetSlot(&epqstate, localslot); + + /* Do the actual delete. */ + ExecSimpleRelationDelete(estate, &epqstate, localslot); + } + else + { + /* The tuple to be deleted could not be found.*/ + ereport(DEBUG1, + (errmsg("logical replication could not find row for delete " + "in replication target %s", + RelationGetRelationName(rel->localrel)))); + } + + /* Cleanup. */ + ExecCloseIndices(estate->es_result_relation_info); + PopActiveSnapshot(); + EvalPlanQualEnd(&epqstate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); + + logicalrep_rel_close(rel, NoLock); + + CommandCounterIncrement(); +} + + +/* + * Logical replication protocol message dispatcher. + */ +static void +apply_dispatch(StringInfo s) +{ + char action = pq_getmsgbyte(s); + + switch (action) + { + /* BEGIN */ + case 'B': + apply_handle_begin(s); + break; + /* COMMIT */ + case 'C': + apply_handle_commit(s); + break; + /* INSERT */ + case 'I': + apply_handle_insert(s); + break; + /* UPDATE */ + case 'U': + apply_handle_update(s); + break; + /* DELETE */ + case 'D': + apply_handle_delete(s); + break; + /* RELATION */ + case 'R': + apply_handle_relation(s); + break; + /* TYPE */ + case 'Y': + apply_handle_type(s); + break; + /* ORIGIN */ + case 'O': + apply_handle_origin(s); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_PROTOCOL_VIOLATION), + errmsg("invalid logical replication message type %c", action))); + } +} + +/* + * Figure out which write/flush positions to report to the walsender process. + * + * We can't simply report back the last LSN the walsender sent us because the + * local transaction might not yet be flushed to disk locally. Instead we + * build a list that associates local with remote LSNs for every commit. When + * reporting back the flush position to the sender we iterate that list and + * check which entries on it are already locally flushed. Those we can report + * as having been flushed. + * + * The have_pending_txes is true if there are outstanding transactions that + * need to be flushed. + */ +static void +get_flush_position(XLogRecPtr *write, XLogRecPtr *flush, + bool *have_pending_txes) +{ + dlist_mutable_iter iter; + XLogRecPtr local_flush = GetFlushRecPtr(); + + *write = InvalidXLogRecPtr; + *flush = InvalidXLogRecPtr; + + dlist_foreach_modify(iter, &lsn_mapping) + { + FlushPosition *pos = + dlist_container(FlushPosition, node, iter.cur); + + *write = pos->remote_end; + + if (pos->local_end <= local_flush) + { + *flush = pos->remote_end; + dlist_delete(iter.cur); + pfree(pos); + } + else + { + /* + * Don't want to uselessly iterate over the rest of the list which + * could potentially be long. Instead get the last element and + * grab the write position from there. + */ + pos = dlist_tail_element(FlushPosition, node, + &lsn_mapping); + *write = pos->remote_end; + *have_pending_txes = true; + return; + } + } + + *have_pending_txes = !dlist_is_empty(&lsn_mapping); +} + +/* + * Store current remote/local lsn pair in the tracking list. + */ +static void +store_flush_position(XLogRecPtr remote_lsn) +{ + FlushPosition *flushpos; + + /* Need to do this in permanent context */ + MemoryContextSwitchTo(ApplyCacheContext); + + /* Track commit lsn */ + flushpos = (FlushPosition *) palloc(sizeof(FlushPosition)); + flushpos->local_end = XactLastCommitEnd; + flushpos->remote_end = remote_lsn; + + dlist_push_tail(&lsn_mapping, &flushpos->node); + MemoryContextSwitchTo(ApplyContext); +} + + +/* Update statistics of the worker. */ +static void +UpdateWorkerStats(XLogRecPtr last_lsn, TimestampTz send_time, bool reply) +{ + MyLogicalRepWorker->last_lsn = last_lsn; + MyLogicalRepWorker->last_send_time = send_time; + MyLogicalRepWorker->last_recv_time = GetCurrentTimestamp(); + if (reply) + { + MyLogicalRepWorker->reply_lsn = last_lsn; + MyLogicalRepWorker->reply_time = send_time; + } +} + +/* + * Apply main loop. + */ +static void +ApplyLoop(void) +{ + XLogRecPtr last_received = InvalidXLogRecPtr; + + /* Init the ApplyContext which we use for easier cleanup. */ + ApplyContext = AllocSetContextCreate(TopMemoryContext, + "ApplyContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + /* mark as idle, before starting to loop */ + pgstat_report_activity(STATE_IDLE, NULL); + + while (!got_SIGTERM) + { + pgsocket fd = PGINVALID_SOCKET; + int rc; + int len; + char *buf = NULL; + bool endofstream = false; + TimestampTz last_recv_timestamp = GetCurrentTimestamp(); + bool ping_sent = false; + + MemoryContextSwitchTo(ApplyContext); + + len = walrcv_receive(wrconn, &buf, &fd); + + if (len != 0) + { + /* Process the data */ + for (;;) + { + CHECK_FOR_INTERRUPTS(); + + if (len == 0) + { + break; + } + else if (len < 0) + { + ereport(LOG, + (errmsg("data stream from publisher has ended"))); + endofstream = true; + break; + } + else + { + int c; + StringInfoData s; + + /* Reset timeout. */ + last_recv_timestamp = GetCurrentTimestamp(); + ping_sent = false; + + /* Ensure we are reading the data into our memory context. */ + MemoryContextSwitchTo(ApplyContext); + + s.data = buf; + s.len = len; + s.cursor = 0; + s.maxlen = -1; + + c = pq_getmsgbyte(&s); + + if (c == 'w') + { + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + TimestampTz send_time; + + start_lsn = pq_getmsgint64(&s); + end_lsn = pq_getmsgint64(&s); + send_time = + IntegerTimestampToTimestampTz(pq_getmsgint64(&s)); + + if (last_received < start_lsn) + last_received = start_lsn; + + if (last_received < end_lsn) + last_received = end_lsn; + + UpdateWorkerStats(last_received, send_time, false); + + apply_dispatch(&s); + } + else if (c == 'k') + { + XLogRecPtr endpos; + TimestampTz timestamp; + bool reply_requested; + + endpos = pq_getmsgint64(&s); + timestamp = + IntegerTimestampToTimestampTz(pq_getmsgint64(&s)); + reply_requested = pq_getmsgbyte(&s); + + send_feedback(endpos, reply_requested, false); + UpdateWorkerStats(last_received, timestamp, true); + } + /* other message types are purposefully ignored */ + } + + len = walrcv_receive(wrconn, &buf, &fd); + } + } + + if (!in_remote_transaction) + { + /* + * If we didn't get any transactions for a while there might be + * unconsumed invalidation messages in the queue, consume them now. + */ + StartTransactionCommand(); + /* Check for subscription change */ + if (!MySubscriptionValid) + reread_subscription(); + CommitTransactionCommand(); + } + + /* confirm all writes at once */ + send_feedback(last_received, false, false); + + /* Cleanup the memory. */ + MemoryContextResetAndDeleteChildren(ApplyContext); + MemoryContextSwitchTo(TopMemoryContext); + + /* Check if we need to exit the streaming loop. */ + if (endofstream) + break; + + /* + * Wait for more data or latch. + */ + rc = WaitLatchOrSocket(&MyProc->procLatch, + WL_SOCKET_READABLE | WL_LATCH_SET | + WL_TIMEOUT | WL_POSTMASTER_DEATH, + fd, NAPTIME_PER_CYCLE, + WAIT_EVENT_LOGICAL_APPLY_MAIN); + + /* Emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + if (rc & WL_TIMEOUT) + { + /* + * We didn't receive anything new. If we haven't heard + * anything from the server for more than + * wal_receiver_timeout / 2, ping the server. Also, if + * it's been longer than wal_receiver_status_interval + * since the last update we sent, send a status update to + * the master anyway, to report any progress in applying + * WAL. + */ + bool requestReply = false; + + /* + * Check if time since last receive from standby has + * reached the configured limit. + */ + if (wal_receiver_timeout > 0) + { + TimestampTz now = GetCurrentTimestamp(); + TimestampTz timeout; + + timeout = + TimestampTzPlusMilliseconds(last_recv_timestamp, + wal_receiver_timeout); + + if (now >= timeout) + ereport(ERROR, + (errmsg("terminating logical replication worker due to timeout"))); + + /* + * We didn't receive anything new, for half of + * receiver replication timeout. Ping the server. + */ + if (!ping_sent) + { + timeout = TimestampTzPlusMilliseconds(last_recv_timestamp, + (wal_receiver_timeout / 2)); + if (now >= timeout) + { + requestReply = true; + ping_sent = true; + } + } + } + + send_feedback(last_received, requestReply, requestReply); + } + + ResetLatch(&MyProc->procLatch); + } +} + +/* + * Send a Standby Status Update message to server. + * + * 'recvpos' is the latest LSN we've received data to, force is set if we need + * to send a response to avoid timeouts. + */ +static void +send_feedback(XLogRecPtr recvpos, bool force, bool requestReply) +{ + static StringInfo reply_message = NULL; + static TimestampTz send_time = 0; + + static XLogRecPtr last_recvpos = InvalidXLogRecPtr; + static XLogRecPtr last_writepos = InvalidXLogRecPtr; + static XLogRecPtr last_flushpos = InvalidXLogRecPtr; + + XLogRecPtr writepos; + XLogRecPtr flushpos; + TimestampTz now; + bool have_pending_txes; + + /* + * If the user doesn't want status to be reported to the publisher, be + * sure to exit before doing anything at all. + */ + if (!force && wal_receiver_status_interval <= 0) + return; + + /* It's legal to not pass a recvpos */ + if (recvpos < last_recvpos) + recvpos = last_recvpos; + + get_flush_position(&writepos, &flushpos, &have_pending_txes); + + /* + * No outstanding transactions to flush, we can report the latest + * received position. This is important for synchronous replication. + */ + if (!have_pending_txes) + flushpos = writepos = recvpos; + + if (writepos < last_writepos) + writepos = last_writepos; + + if (flushpos < last_flushpos) + flushpos = last_flushpos; + + now = GetCurrentTimestamp(); + + /* if we've already reported everything we're good */ + if (!force && + writepos == last_writepos && + flushpos == last_flushpos && + !TimestampDifferenceExceeds(send_time, now, + wal_receiver_status_interval * 1000)) + return; + send_time = now; + + if (!reply_message) + { + MemoryContext oldctx = MemoryContextSwitchTo(ApplyCacheContext); + reply_message = makeStringInfo(); + MemoryContextSwitchTo(oldctx); + } + else + resetStringInfo(reply_message); + + pq_sendbyte(reply_message, 'r'); + pq_sendint64(reply_message, recvpos); /* write */ + pq_sendint64(reply_message, flushpos); /* flush */ + pq_sendint64(reply_message, writepos); /* apply */ + pq_sendint64(reply_message, now); /* sendTime */ + pq_sendbyte(reply_message, requestReply); /* replyRequested */ + + elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X", + force, + (uint32) (recvpos >> 32), (uint32) recvpos, + (uint32) (writepos >> 32), (uint32) writepos, + (uint32) (flushpos >> 32), (uint32) flushpos + ); + + walrcv_send(wrconn, reply_message->data, reply_message->len); + + if (recvpos > last_recvpos) + last_recvpos = recvpos; + if (writepos > last_writepos) + last_writepos = writepos; + if (flushpos > last_flushpos) + last_flushpos = flushpos; +} + + +/* + * Reread subscription info and exit on change. + */ +static void +reread_subscription(void) +{ + MemoryContext oldctx; + Subscription *newsub; + + /* Ensure allocations in permanent context. */ + oldctx = MemoryContextSwitchTo(ApplyCacheContext); + + newsub = GetSubscription(MyLogicalRepWorker->subid, true); + + /* + * Exit if connection string was changed. The launcher will start + * new worker. + */ + if (strcmp(newsub->conninfo, MySubscription->conninfo) != 0) + { + ereport(LOG, + (errmsg("logical replication worker for subscription \"%s\" will " + "restart because the connection information was changed", + MySubscription->name))); + + walrcv_disconnect(wrconn); + proc_exit(0); + } + + /* + * Exit if publication list was changed. The launcher will start + * new worker. + */ + if (!equal(newsub->publications, MySubscription->publications)) + { + ereport(LOG, + (errmsg("logical replication worker for subscription \"%s\" will " + "restart because subscription's publications were changed", + MySubscription->name))); + + walrcv_disconnect(wrconn); + proc_exit(0); + } + + /* + * Exit if the subscription was removed. + * This normally should not happen as the worker gets killed + * during DROP SUBSCRIPTION. + */ + if (!newsub) + { + ereport(LOG, + (errmsg("logical replication worker for subscription \"%s\" will " + "stop because the subscription was removed", + MySubscription->name))); + + walrcv_disconnect(wrconn); + proc_exit(0); + } + + /* + * Exit if the subscription was disabled. + * This normally should not happen as the worker gets killed + * during ALTER SUBSCRIPTION ... DISABLE. + */ + if (!newsub->enabled) + { + ereport(LOG, + (errmsg("logical replication worker for subscription \"%s\" will " + "stop because the subscription was disabled", + MySubscription->name))); + + walrcv_disconnect(wrconn); + proc_exit(0); + } + + /* Check for other changes that should never happen too. */ + if (newsub->dbid != MySubscription->dbid || + strcmp(newsub->name, MySubscription->name) != 0 || + strcmp(newsub->slotname, MySubscription->slotname) != 0) + { + elog(ERROR, "subscription %u changed unexpectedly", + MyLogicalRepWorker->subid); + } + + /* Clean old subscription info and switch to new one. */ + FreeSubscription(MySubscription); + MySubscription = newsub; + + MemoryContextSwitchTo(oldctx); + + MySubscriptionValid = true; +} + +/* + * Callback from subscription syscache invalidation. + */ +static void +subscription_change_cb(Datum arg, int cacheid, uint32 hashvalue) +{ + MySubscriptionValid = false; +} + + +/* Logical Replication Apply worker entry point */ +void +ApplyWorkerMain(Datum main_arg) +{ + int worker_slot = DatumGetObjectId(main_arg); + MemoryContext oldctx; + char originname[NAMEDATALEN]; + RepOriginId originid; + XLogRecPtr origin_startpos; + char *err; + int server_version; + TimeLineID startpointTLI; + WalRcvStreamOptions options; + + /* Attach to slot */ + logicalrep_worker_attach(worker_slot); + + /* Setup signal handling */ + pqsignal(SIGTERM, logicalrep_worker_sigterm); + BackgroundWorkerUnblockSignals(); + + /* Initialise stats to a sanish value */ + MyLogicalRepWorker->last_send_time = MyLogicalRepWorker->last_recv_time = + MyLogicalRepWorker->reply_time = GetCurrentTimestamp(); + + /* Make it easy to identify our processes. */ + SetConfigOption("application_name", MyBgworkerEntry->bgw_name, + PGC_USERSET, PGC_S_SESSION); + + /* Load the libpq-specific functions */ + load_file("libpqwalreceiver", false); + + Assert(CurrentResourceOwner == NULL); + CurrentResourceOwner = ResourceOwnerCreate(NULL, + "logical replication apply"); + + /* Run as replica session replication role. */ + SetConfigOption("session_replication_role", "replica", + PGC_SUSET, PGC_S_OVERRIDE); + + /* Connect to our database. */ + BackgroundWorkerInitializeConnectionByOid(MyLogicalRepWorker->dbid, + MyLogicalRepWorker->userid); + + /* Load the subscription into persistent memory context. */ + CreateCacheMemoryContext(); + ApplyCacheContext = AllocSetContextCreate(CacheMemoryContext, + "ApplyCacheContext", + ALLOCSET_DEFAULT_SIZES); + StartTransactionCommand(); + oldctx = MemoryContextSwitchTo(ApplyCacheContext); + MySubscription = GetSubscription(MyLogicalRepWorker->subid, false); + MySubscriptionValid = true; + MemoryContextSwitchTo(oldctx); + + if (!MySubscription->enabled) + { + ereport(LOG, + (errmsg("logical replication worker for subscription \"%s\" will not " + "start because the subscription was disabled during startup", + MySubscription->name))); + + proc_exit(0); + } + + /* Keep us informed about subscription changes. */ + CacheRegisterSyscacheCallback(SUBSCRIPTIONOID, + subscription_change_cb, + (Datum) 0); + + ereport(LOG, + (errmsg("logical replication apply for subscription \"%s\" has started", + MySubscription->name))); + + /* Setup replication origin tracking. */ + snprintf(originname, sizeof(originname), "pg_%u", MySubscription->oid); + originid = replorigin_by_name(originname, true); + if (!OidIsValid(originid)) + originid = replorigin_create(originname); + replorigin_session_setup(originid); + replorigin_session_origin = originid; + origin_startpos = replorigin_session_get_progress(false); + + CommitTransactionCommand(); + + /* Connect to the origin and start the replication. */ + elog(DEBUG1, "connecting to publisher using connection string \"%s\"", + MySubscription->conninfo); + wrconn = walrcv_connect(MySubscription->conninfo, true, + MySubscription->name, &err); + if (wrconn == NULL) + ereport(ERROR, + (errmsg("could not connect to the publisher: %s", err))); + + /* + * We don't really use the output identify_system for anything + * but it does some initializations on the upstream so let's still + * call it. + */ + (void) walrcv_identify_system(wrconn, &startpointTLI, &server_version); + + /* Build logical replication streaming options. */ + options.logical = true; + options.startpoint = origin_startpos; + options.slotname = MySubscription->slotname; + options.proto.logical.proto_version = LOGICALREP_PROTO_VERSION_NUM; + options.proto.logical.publication_names = MySubscription->publications; + + /* Start streaming from the slot. */ + walrcv_startstreaming(wrconn, &options); + + /* Run the main loop. */ + ApplyLoop(); + + walrcv_disconnect(wrconn); + + /* We should only get here if we received SIGTERM */ + proc_exit(0); +} diff --git a/src/backend/replication/pgoutput/Makefile b/src/backend/replication/pgoutput/Makefile new file mode 100644 index 0000000000..1b68e2b4b6 --- /dev/null +++ b/src/backend/replication/pgoutput/Makefile @@ -0,0 +1,32 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for src/backend/replication/pgoutput +# +# IDENTIFICATION +# src/backend/replication/pgoutput +# +#------------------------------------------------------------------------- + +subdir = src/backend/replication/pgoutput +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) + +OBJS = pgoutput.o $(WIN32RES) +PGFILEDESC = "pgoutput - standard logical replication output plugin" +NAME = pgoutput + +all: all-shared-lib + +include $(top_srcdir)/src/Makefile.shlib + +install: all installdirs install-lib + +installdirs: installdirs-lib + +uninstall: uninstall-lib + +clean distclean maintainer-clean: clean-lib + rm -f $(OBJS) diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c new file mode 100644 index 0000000000..04dde5d494 --- /dev/null +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -0,0 +1,596 @@ +/*------------------------------------------------------------------------- + * + * pgoutput.c + * Logical Replication output plugin + * + * Copyright (c) 2012-2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/replication/pgoutput/pgoutput.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_publication.h" + +#include "replication/logical.h" +#include "replication/logicalproto.h" +#include "replication/origin.h" +#include "replication/pgoutput.h" + +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/int8.h" +#include "utils/memutils.h" +#include "utils/syscache.h" + +PG_MODULE_MAGIC; + +extern void _PG_output_plugin_init(OutputPluginCallbacks *cb); + +static void pgoutput_startup(LogicalDecodingContext * ctx, + OutputPluginOptions *opt, bool is_init); +static void pgoutput_shutdown(LogicalDecodingContext * ctx); +static void pgoutput_begin_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn); +static void pgoutput_commit_txn(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, XLogRecPtr commit_lsn); +static void pgoutput_change(LogicalDecodingContext *ctx, + ReorderBufferTXN *txn, Relation rel, + ReorderBufferChange *change); +static bool pgoutput_origin_filter(LogicalDecodingContext *ctx, + RepOriginId origin_id); + +static bool publications_valid; + +static List *LoadPublications(List *pubnames); +static void publication_invalidation_cb(Datum arg, int cacheid, + uint32 hashvalue); + +/* Entry in the map used to remember which relation schemas we sent. */ +typedef struct RelationSyncEntry +{ + Oid relid; /* relation oid */ + bool schema_sent; /* did we send the schema? */ + bool replicate_valid; + PublicationActions pubactions; +} RelationSyncEntry; + +/* Map used to remember which relation schemas we sent. */ +static HTAB *RelationSyncCache = NULL; + +static void init_rel_sync_cache(MemoryContext decoding_context); +static RelationSyncEntry *get_rel_sync_entry(PGOutputData *data, Oid relid); +static void rel_sync_cache_relation_cb(Datum arg, Oid relid); +static void rel_sync_cache_publication_cb(Datum arg, int cacheid, + uint32 hashvalue); + +/* + * Specify output plugin callbacks + */ +void +_PG_output_plugin_init(OutputPluginCallbacks *cb) +{ + AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit); + + cb->startup_cb = pgoutput_startup; + cb->begin_cb = pgoutput_begin_txn; + cb->change_cb = pgoutput_change; + cb->commit_cb = pgoutput_commit_txn; + cb->filter_by_origin_cb = pgoutput_origin_filter; + cb->shutdown_cb = pgoutput_shutdown; +} + +static void +parse_output_parameters(List *options, uint32 *protocol_version, + List **publication_names) +{ + ListCell *lc; + bool protocol_version_given = false; + bool publication_names_given = false; + + foreach(lc, options) + { + DefElem *defel = (DefElem *) lfirst(lc); + + Assert(defel->arg == NULL || IsA(defel->arg, String)); + + /* Check each param, whether or not we recognise it */ + if (strcmp(defel->defname, "proto_version") == 0) + { + int64 parsed; + + if (protocol_version_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + protocol_version_given = true; + + if (!scanint8(strVal(defel->arg), true, &parsed)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid proto_version"))); + + if (parsed > PG_UINT32_MAX || parsed < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("proto_verson \"%s\" out of range", + strVal(defel->arg)))); + + *protocol_version = (uint32) parsed; + } + else if (strcmp(defel->defname, "publication_names") == 0) + { + if (publication_names_given) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"))); + publication_names_given = true; + + if (!SplitIdentifierString(strVal(defel->arg), ',', + publication_names)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid publication_names syntax"))); + } + else + elog(ERROR, "unrecognized pgoutput option: %s", defel->defname); + } +} + +/* + * Initialize this plugin + */ +static void +pgoutput_startup(LogicalDecodingContext * ctx, OutputPluginOptions *opt, + bool is_init) +{ + PGOutputData *data = palloc0(sizeof(PGOutputData)); + + /* Create our memory context for private allocations. */ + data->context = AllocSetContextCreate(ctx->context, + "logical replication output context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + + ctx->output_plugin_private = data; + + /* This plugin uses binary protocol. */ + opt->output_type = OUTPUT_PLUGIN_BINARY_OUTPUT; + + /* + * This is replication start and not slot initialization. + * + * Parse and validate options passed by the client. + */ + if (!is_init) + { + /* Parse the params and ERROR if we see any we don't recognise */ + parse_output_parameters(ctx->output_plugin_options, + &data->protocol_version, + &data->publication_names); + + /* Check if we support requested protol */ + if (data->protocol_version != LOGICALREP_PROTO_VERSION_NUM) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent proto_version=%d but we only support protocol %d or lower", + data->protocol_version, LOGICALREP_PROTO_VERSION_NUM))); + + if (data->protocol_version < LOGICALREP_PROTO_MIN_VERSION_NUM) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("client sent proto_version=%d but we only support protocol %d or higher", + data->protocol_version, LOGICALREP_PROTO_MIN_VERSION_NUM))); + + if (list_length(data->publication_names) < 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("publication_names parameter missing"))); + + /* Init publication state. */ + data->publications = NIL; + publications_valid = false; + CacheRegisterSyscacheCallback(PUBLICATIONOID, + publication_invalidation_cb, + (Datum) 0); + + /* Initialize relation schema cache. */ + init_rel_sync_cache(CacheMemoryContext); + } +} + +/* + * BEGIN callback + */ +static void +pgoutput_begin_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn) +{ + bool send_replication_origin = txn->origin_id != InvalidRepOriginId; + + OutputPluginPrepareWrite(ctx, !send_replication_origin); + logicalrep_write_begin(ctx->out, txn); + + if (send_replication_origin) + { + char *origin; + + /* Message boundary */ + OutputPluginWrite(ctx, false); + OutputPluginPrepareWrite(ctx, true); + + /* + * XXX: which behaviour we want here? + * + * Alternatives: + * - don't send origin message if origin name not found + * (that's what we do now) + * - throw error - that will break replication, not good + * - send some special "unknown" origin + */ + if (replorigin_by_oid(txn->origin_id, true, &origin)) + logicalrep_write_origin(ctx->out, origin, txn->origin_lsn); + } + + OutputPluginWrite(ctx, true); +} + +/* + * COMMIT callback + */ +static void +pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn) +{ + OutputPluginPrepareWrite(ctx, true); + logicalrep_write_commit(ctx->out, txn, commit_lsn); + OutputPluginWrite(ctx, true); +} + +/* + * Sends the decoded DML over wire. + */ +static void +pgoutput_change(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, + Relation relation, ReorderBufferChange *change) +{ + PGOutputData *data = (PGOutputData *) ctx->output_plugin_private; + MemoryContext old; + RelationSyncEntry *relentry; + + relentry = get_rel_sync_entry(data, RelationGetRelid(relation)); + + /* First check the table filter */ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + if (!relentry->pubactions.pubinsert) + return; + break; + case REORDER_BUFFER_CHANGE_UPDATE: + if (!relentry->pubactions.pubupdate) + return; + break; + case REORDER_BUFFER_CHANGE_DELETE: + if (!relentry->pubactions.pubdelete) + return; + break; + default: + Assert(false); + } + + /* Avoid leaking memory by using and resetting our own context */ + old = MemoryContextSwitchTo(data->context); + + /* + * Write the relation schema if the current schema haven't been sent yet. + */ + if (!relentry->schema_sent) + { + TupleDesc desc; + int i; + + desc = RelationGetDescr(relation); + + /* + * Write out type info if needed. We do that only for user created + * types. + */ + for (i = 0; i < desc->natts; i++) + { + Form_pg_attribute att = desc->attrs[i]; + + if (att->attisdropped) + continue; + + if (att->atttypid < FirstNormalObjectId) + continue; + + OutputPluginPrepareWrite(ctx, false); + logicalrep_write_typ(ctx->out, att->atttypid); + OutputPluginWrite(ctx, false); + } + + OutputPluginPrepareWrite(ctx, false); + logicalrep_write_rel(ctx->out, relation); + OutputPluginWrite(ctx, false); + relentry->schema_sent = true; + } + + /* Send the data */ + switch (change->action) + { + case REORDER_BUFFER_CHANGE_INSERT: + OutputPluginPrepareWrite(ctx, true); + logicalrep_write_insert(ctx->out, relation, + &change->data.tp.newtuple->tuple); + OutputPluginWrite(ctx, true); + break; + case REORDER_BUFFER_CHANGE_UPDATE: + { + HeapTuple oldtuple = change->data.tp.oldtuple ? + &change->data.tp.oldtuple->tuple : NULL; + + OutputPluginPrepareWrite(ctx, true); + logicalrep_write_update(ctx->out, relation, oldtuple, + &change->data.tp.newtuple->tuple); + OutputPluginWrite(ctx, true); + break; + } + case REORDER_BUFFER_CHANGE_DELETE: + if (change->data.tp.oldtuple) + { + OutputPluginPrepareWrite(ctx, true); + logicalrep_write_delete(ctx->out, relation, + &change->data.tp.oldtuple->tuple); + OutputPluginWrite(ctx, true); + } + else + elog(DEBUG1, "didn't send DELETE change because of missing oldtuple"); + break; + default: + Assert(false); + } + + /* Cleanup */ + MemoryContextSwitchTo(old); + MemoryContextReset(data->context); +} + +/* + * Currently we always forward. + */ +static bool +pgoutput_origin_filter(LogicalDecodingContext *ctx, + RepOriginId origin_id) +{ + return false; +} + +/* + * Shutdown the output plugin. + * + * Note, we don't need to clean the data->context as it's child context + * of the ctx->context so it will be cleaned up by logical decoding machinery. + */ +static void +pgoutput_shutdown(LogicalDecodingContext * ctx) +{ + if (RelationSyncCache) + { + hash_destroy(RelationSyncCache); + RelationSyncCache = NULL; + } +} + +/* + * Load publications from the list of publication names. + */ +static List * +LoadPublications(List *pubnames) +{ + List *result = NIL; + ListCell *lc; + + foreach (lc, pubnames) + { + char *pubname = (char *) lfirst(lc); + Publication *pub = GetPublicationByName(pubname, false); + + result = lappend(result, pub); + } + + return result; +} + +/* + * Publication cache invalidation callback. + */ +static void +publication_invalidation_cb(Datum arg, int cacheid, uint32 hashvalue) +{ + publications_valid = false; + + /* + * Also invalidate per-relation cache so that next time the filtering + * info is checked it will be updated with the new publication + * settings. + */ + rel_sync_cache_publication_cb(arg, cacheid, hashvalue); +} + +/* + * Initialize the relation schema sync cache for a decoding session. + * + * The hash table is destoyed at the end of a decoding session. While + * relcache invalidations still exist and will still be invoked, they + * will just see the null hash table global and take no action. + */ +static void +init_rel_sync_cache(MemoryContext cachectx) +{ + HASHCTL ctl; + MemoryContext old_ctxt; + + if (RelationSyncCache != NULL) + return; + + /* Make a new hash table for the cache */ + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RelationSyncEntry); + ctl.hcxt = cachectx; + + old_ctxt = MemoryContextSwitchTo(cachectx); + RelationSyncCache = hash_create("logical replication output relation cache", + 128, &ctl, + HASH_ELEM | HASH_CONTEXT | HASH_BLOBS); + (void) MemoryContextSwitchTo(old_ctxt); + + Assert(RelationSyncCache != NULL); + + CacheRegisterRelcacheCallback(rel_sync_cache_relation_cb, (Datum) 0); + CacheRegisterSyscacheCallback(PUBLICATIONRELMAP, + rel_sync_cache_publication_cb, + (Datum) 0); +} + +/* + * Find or create entry in the relation schema cache. + */ +static RelationSyncEntry * +get_rel_sync_entry(PGOutputData *data, Oid relid) +{ + RelationSyncEntry *entry; + bool found; + MemoryContext oldctx; + + Assert(RelationSyncCache != NULL); + + /* Find cached function info, creating if not found */ + oldctx = MemoryContextSwitchTo(CacheMemoryContext); + entry = (RelationSyncEntry *) hash_search(RelationSyncCache, + (void *) &relid, + HASH_ENTER, &found); + MemoryContextSwitchTo(oldctx); + Assert(entry != NULL); + + /* Not found means schema wasn't sent */ + if (!found || !entry->replicate_valid) + { + List *pubids = GetRelationPublications(relid); + ListCell *lc; + + /* Reload publications if needed before use. */ + if (!publications_valid) + { + oldctx = MemoryContextSwitchTo(CacheMemoryContext); + if (data->publications) + list_free_deep(data->publications); + + data->publications = LoadPublications(data->publication_names); + MemoryContextSwitchTo(oldctx); + publications_valid = true; + } + + /* + * Build publication cache. We can't use one provided by relcache + * as relcache considers all publications given relation is in, but + * here we only need to consider ones that the subscriber requested. + */ + entry->pubactions.pubinsert = entry->pubactions.pubupdate = + entry->pubactions.pubdelete = false; + + foreach(lc, data->publications) + { + Publication *pub = lfirst(lc); + + if (pub->alltables || list_member_oid(pubids, pub->oid)) + { + entry->pubactions.pubinsert |= pub->pubactions.pubinsert; + entry->pubactions.pubupdate |= pub->pubactions.pubupdate; + entry->pubactions.pubdelete |= pub->pubactions.pubdelete; + } + + if (entry->pubactions.pubinsert && entry->pubactions.pubupdate && + entry->pubactions.pubdelete) + break; + } + + list_free(pubids); + + entry->replicate_valid = true; + } + + if (!found) + entry->schema_sent = false; + + return entry; +} + +/* + * Relcache invalidation callback + */ +static void +rel_sync_cache_relation_cb(Datum arg, Oid relid) +{ + RelationSyncEntry *entry; + + /* + * We can get here if the plugin was used in SQL interface as the + * RelSchemaSyncCache is detroyed when the decoding finishes, but there + * is no way to unregister the relcache invalidation callback. + */ + if (RelationSyncCache == NULL) + return; + + /* + * Nobody keeps pointers to entries in this hash table around outside + * logical decoding callback calls - but invalidation events can come in + * *during* a callback if we access the relcache in the callback. Because + * of that we must mark the cache entry as invalid but not remove it from + * the hash while it could still be referenced, then prune it at a later + * safe point. + * + * Getting invalidations for relations that aren't in the table is + * entirely normal, since there's no way to unregister for an + * invalidation event. So we don't care if it's found or not. + */ + entry = (RelationSyncEntry *) hash_search(RelationSyncCache, &relid, + HASH_FIND, NULL); + + /* + * Reset schema sent status as the relation definition may have + * changed. + */ + if (entry != NULL) + entry->schema_sent = false; +} + +/* + * Publication relation map syscache invalidation callback + */ +static void +rel_sync_cache_publication_cb(Datum arg, int cacheid, uint32 hashvalue) +{ + HASH_SEQ_STATUS status; + RelationSyncEntry *entry; + + /* + * We can get here if the plugin was used in SQL interface as the + * RelSchemaSyncCache is detroyed when the decoding finishes, but there + * is no way to unregister the relcache invalidation callback. + */ + if (RelationSyncCache == NULL) + return; + + /* + * There is no way to find which entry in our cache the hash belongs to + * so mark the whole cache as invalid. + */ + hash_seq_init(&status, RelationSyncCache); + while ((entry = (RelationSyncEntry *) hash_seq_search(&status)) != NULL) + entry->replicate_valid = false; +} diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index c6b54ec36a..0e4a4b9d19 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -196,6 +196,7 @@ WalReceiverMain(void) WalRcvData *walrcv = WalRcv; TimestampTz last_recv_timestamp; bool ping_sent; + char *err; /* * WalRcv should be set up already (if we are a backend, we inherit this @@ -293,7 +294,10 @@ WalReceiverMain(void) /* Establish the connection to the primary for XLOG streaming */ EnableWalRcvImmediateExit(); - wrconn = walrcv_connect(conninfo, false, "walreceiver"); + wrconn = walrcv_connect(conninfo, false, "walreceiver", &err); + if (!wrconn) + ereport(ERROR, + (errmsg("could not connect to the primary server: %s", err))); DisableWalRcvImmediateExit(); /* @@ -316,13 +320,16 @@ WalReceiverMain(void) { char *primary_sysid; char standby_sysid[32]; + int server_version; + WalRcvStreamOptions options; /* * Check that we're connected to a valid server using the - * IDENTIFY_SYSTEM replication command, + * IDENTIFY_SYSTEM replication command. */ EnableWalRcvImmediateExit(); - primary_sysid = walrcv_identify_system(wrconn, &primaryTLI); + primary_sysid = walrcv_identify_system(wrconn, &primaryTLI, + &server_version); snprintf(standby_sysid, sizeof(standby_sysid), UINT64_FORMAT, GetSystemIdentifier()); @@ -368,9 +375,12 @@ WalReceiverMain(void) * history file, bump recovery target timeline, and ask us to restart * on the new timeline. */ + options.logical = false; + options.startpoint = startpoint; + options.slotname = slotname[0] != '\0' ? slotname : NULL; + options.proto.physical.startpointTLI = startpointTLI; ThisTimeLineID = startpointTLI; - if (walrcv_startstreaming(wrconn, startpointTLI, startpoint, - slotname[0] != '\0' ? slotname : NULL)) + if (walrcv_startstreaming(wrconn, &options)) { if (first_stream) ereport(LOG, diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 00f5ae9e92..2d1ed143e0 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -28,6 +28,7 @@ #include "postmaster/bgworker_internals.h" #include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" +#include "replication/logicallauncher.h" #include "replication/slot.h" #include "replication/walreceiver.h" #include "replication/walsender.h" @@ -143,6 +144,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, ReplicationOriginShmemSize()); size = add_size(size, WalSndShmemSize()); size = add_size(size, WalRcvShmemSize()); + size = add_size(size, ApplyLauncherShmemSize()); size = add_size(size, SnapMgrShmemSize()); size = add_size(size, BTreeShmemSize()); size = add_size(size, SyncScanShmemSize()); @@ -258,6 +260,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) ReplicationOriginShmemInit(); WalSndShmemInit(); WalRcvShmemInit(); + ApplyLauncherShmemInit(); /* * Set up other modules that need some shared memory space diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index dd0418246a..c95ca5b2e1 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -48,3 +48,5 @@ ReplicationOriginLock 40 MultiXactTruncationLock 41 OldSnapshotTimeMapLock 42 BackendRandomLock 43 +LogicalRepLauncherLock 44 +LogicalRepWorkerLock 45 diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 1492101336..0306247177 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -44,9 +44,11 @@ #include "commands/portalcmds.h" #include "commands/prepare.h" #include "commands/proclang.h" +#include "commands/publicationcmds.h" #include "commands/schemacmds.h" #include "commands/seclabel.h" #include "commands/sequence.h" +#include "commands/subscriptioncmds.h" #include "commands/tablecmds.h" #include "commands/tablespace.h" #include "commands/trigger.h" @@ -209,6 +211,11 @@ check_xact_readonly(Node *parsetree) case T_CreateForeignTableStmt: case T_ImportForeignSchemaStmt: case T_SecLabelStmt: + case T_CreatePublicationStmt: + case T_AlterPublicationStmt: + case T_CreateSubscriptionStmt: + case T_AlterSubscriptionStmt: + case T_DropSubscriptionStmt: PreventCommandIfReadOnly(CreateCommandTag(parsetree)); PreventCommandIfParallelMode(CreateCommandTag(parsetree)); break; @@ -1578,6 +1585,33 @@ ProcessUtilitySlow(ParseState *pstate, address = CreateAccessMethod((CreateAmStmt *) parsetree); break; + case T_CreatePublicationStmt: + address = CreatePublication((CreatePublicationStmt *) parsetree); + break; + + case T_AlterPublicationStmt: + AlterPublication((AlterPublicationStmt *) parsetree); + /* + * AlterPublication calls EventTriggerCollectSimpleCommand + * directly + */ + commandCollected = true; + break; + + case T_CreateSubscriptionStmt: + address = CreateSubscription((CreateSubscriptionStmt *) parsetree); + break; + + case T_AlterSubscriptionStmt: + address = AlterSubscription((AlterSubscriptionStmt *) parsetree); + break; + + case T_DropSubscriptionStmt: + DropSubscription((DropSubscriptionStmt *) parsetree); + /* no commands stashed for DROP */ + commandCollected = true; + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(parsetree)); @@ -1941,6 +1975,12 @@ AlterObjectTypeCommandTag(ObjectType objtype) case OBJECT_MATVIEW: tag = "ALTER MATERIALIZED VIEW"; break; + case OBJECT_PUBLICATION: + tag = "ALTER PUBLICATION"; + break; + case OBJECT_SUBSCRIPTION: + tag = "ALTER SUBSCRIPTION"; + break; default: tag = "???"; break; @@ -2232,6 +2272,9 @@ CreateCommandTag(Node *parsetree) case OBJECT_ACCESS_METHOD: tag = "DROP ACCESS METHOD"; break; + case OBJECT_PUBLICATION: + tag = "DROP PUBLICATION"; + break; default: tag = "???"; } @@ -2602,6 +2645,26 @@ CreateCommandTag(Node *parsetree) tag = "CREATE ACCESS METHOD"; break; + case T_CreatePublicationStmt: + tag = "CREATE PUBLICATION"; + break; + + case T_AlterPublicationStmt: + tag = "ALTER PUBLICATION"; + break; + + case T_CreateSubscriptionStmt: + tag = "CREATE SUBSCRIPTION"; + break; + + case T_AlterSubscriptionStmt: + tag = "ALTER SUBSCRIPTION"; + break; + + case T_DropSubscriptionStmt: + tag = "DROP SUBSCRIPTION"; + break; + case T_PrepareStmt: tag = "PREPARE"; break; @@ -3166,6 +3229,26 @@ GetCommandLogLevel(Node *parsetree) lev = LOGSTMT_DDL; break; + case T_CreatePublicationStmt: + lev = LOGSTMT_DDL; + break; + + case T_AlterPublicationStmt: + lev = LOGSTMT_DDL; + break; + + case T_CreateSubscriptionStmt: + lev = LOGSTMT_DDL; + break; + + case T_AlterSubscriptionStmt: + lev = LOGSTMT_DDL; + break; + + case T_DropSubscriptionStmt: + lev = LOGSTMT_DDL; + break; + /* already-planned queries */ case T_PlannedStmt: { diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 1f50282803..11f9218f66 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -375,11 +375,16 @@ AddRelcacheInvalidationMessage(InvalidationListHeader *hdr, { SharedInvalidationMessage msg; - /* Don't add a duplicate item */ - /* We assume dbId need not be checked because it will never change */ + /* + * Don't add a duplicate item. + * We assume dbId need not be checked because it will never change. + * InvalidOid for relId means all relations so we don't need to add + * individual ones when it is present. + */ ProcessMessageList(hdr->rclist, if (msg->rc.id == SHAREDINVALRELCACHE_ID && - msg->rc.relId == relId) + (msg->rc.relId == relId || + msg->rc.relId == InvalidOid)) return); /* OK, add the item */ @@ -509,8 +514,10 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) /* * If the relation being invalidated is one of those cached in the local * relcache init file, mark that we need to zap that file at commit. + * Same is true when we are invalidating whole relcache. */ - if (OidIsValid(dbId) && RelationIdIsInInitFile(relId)) + if (OidIsValid(dbId) && + (RelationIdIsInInitFile(relId) || relId == InvalidOid)) transInvalInfo->RelcacheInitFileInval = true; } @@ -565,7 +572,10 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg) { int i; - RelationCacheInvalidateEntry(msg->rc.relId); + if (msg->rc.relId == InvalidOid) + RelationCacheInvalidate(); + else + RelationCacheInvalidateEntry(msg->rc.relId); for (i = 0; i < relcache_callback_count; i++) { @@ -1226,6 +1236,21 @@ CacheInvalidateRelcache(Relation relation) RegisterRelcacheInvalidation(databaseId, relationId); } +/* + * CacheInvalidateRelcacheAll + * Register invalidation of the whole relcache at the end of command. + * + * This is used by alter publication as changes in publications may affect + * large number of tables. + */ +void +CacheInvalidateRelcacheAll(void) +{ + PrepareInvalidationState(); + + RegisterRelcacheInvalidation(InvalidOid, InvalidOid); +} + /* * CacheInvalidateRelcacheByTuple * As above, but relation is identified by passing its pg_class tuple. diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 24678fcd48..26ff7e187a 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -53,8 +53,10 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" +#include "catalog/pg_publication.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" @@ -103,6 +105,7 @@ static const FormData_pg_attribute Desc_pg_authid[Natts_pg_authid] = {Schema_pg_ static const FormData_pg_attribute Desc_pg_auth_members[Natts_pg_auth_members] = {Schema_pg_auth_members}; static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index}; static const FormData_pg_attribute Desc_pg_shseclabel[Natts_pg_shseclabel] = {Schema_pg_shseclabel}; +static const FormData_pg_attribute Desc_pg_subscription[Natts_pg_subscription] = {Schema_pg_subscription}; /* * Hash tables that index the relation cache @@ -2336,7 +2339,10 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc) list_free(relation->rd_indexlist); bms_free(relation->rd_indexattr); bms_free(relation->rd_keyattr); + bms_free(relation->rd_pkattr); bms_free(relation->rd_idattr); + if (relation->rd_pubactions) + pfree(relation->rd_pubactions); if (relation->rd_options) pfree(relation->rd_options); if (relation->rd_indextuple) @@ -3043,6 +3049,7 @@ AtEOXact_cleanup(Relation relation, bool isCommit) list_free(relation->rd_indexlist); relation->rd_indexlist = NIL; relation->rd_oidindex = InvalidOid; + relation->rd_pkindex = InvalidOid; relation->rd_replidindex = InvalidOid; relation->rd_indexvalid = 0; } @@ -3155,6 +3162,7 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit, list_free(relation->rd_indexlist); relation->rd_indexlist = NIL; relation->rd_oidindex = InvalidOid; + relation->rd_pkindex = InvalidOid; relation->rd_replidindex = InvalidOid; relation->rd_indexvalid = 0; } @@ -3588,8 +3596,10 @@ RelationCacheInitializePhase2(void) false, Natts_pg_auth_members, Desc_pg_auth_members); formrdesc("pg_shseclabel", SharedSecLabelRelation_Rowtype_Id, true, false, Natts_pg_shseclabel, Desc_pg_shseclabel); + formrdesc("pg_subscription", SubscriptionRelation_Rowtype_Id, true, + true, Natts_pg_subscription, Desc_pg_subscription); -#define NUM_CRITICAL_SHARED_RELS 4 /* fix if you change list above */ +#define NUM_CRITICAL_SHARED_RELS 5 /* fix if you change list above */ } MemoryContextSwitchTo(oldcxt); @@ -4425,6 +4435,7 @@ RelationGetIndexList(Relation relation) oldlist = relation->rd_indexlist; relation->rd_indexlist = list_copy(result); relation->rd_oidindex = oidIndex; + relation->rd_pkindex = pkeyIndex; if (replident == REPLICA_IDENTITY_DEFAULT && OidIsValid(pkeyIndex)) relation->rd_replidindex = pkeyIndex; else if (replident == REPLICA_IDENTITY_INDEX && OidIsValid(candidateIndex)) @@ -4492,7 +4503,7 @@ insert_ordered_oid(List *list, Oid datum) * to ensure that a correct rd_indexattr set has been cached before first * calling RelationSetIndexList; else a subsequent inquiry might cause a * wrong rd_indexattr set to get computed and cached. Likewise, we do not - * touch rd_keyattr or rd_idattr. + * touch rd_keyattr, rd_pkattr or rd_idattr. */ void RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex) @@ -4508,7 +4519,11 @@ RelationSetIndexList(Relation relation, List *indexIds, Oid oidIndex) list_free(relation->rd_indexlist); relation->rd_indexlist = indexIds; relation->rd_oidindex = oidIndex; - /* For the moment, assume the target rel hasn't got a replica index */ + /* + * For the moment, assume the target rel hasn't got a pk or replica + * index. We'll load them on demand in the API that wraps access to them. + */ + relation->rd_pkindex = InvalidOid; relation->rd_replidindex = InvalidOid; relation->rd_indexvalid = 2; /* mark list as forced */ /* Flag relation as needing eoxact cleanup (to reset the list) */ @@ -4543,6 +4558,27 @@ RelationGetOidIndex(Relation relation) return relation->rd_oidindex; } +/* + * RelationGetPrimaryKeyIndex -- get OID of the relation's primary key index + * + * Returns InvalidOid if there is no such index. + */ +Oid +RelationGetPrimaryKeyIndex(Relation relation) +{ + List *ilist; + + if (relation->rd_indexvalid == 0) + { + /* RelationGetIndexList does the heavy lifting. */ + ilist = RelationGetIndexList(relation); + list_free(ilist); + Assert(relation->rd_indexvalid != 0); + } + + return relation->rd_pkindex; +} + /* * RelationGetReplicaIndex -- get OID of the relation's replica identity index * @@ -4722,8 +4758,10 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) { Bitmapset *indexattrs; /* indexed columns */ Bitmapset *uindexattrs; /* columns in unique indexes */ + Bitmapset *pkindexattrs; /* columns in the primary index */ Bitmapset *idindexattrs; /* columns in the replica identity */ List *indexoidlist; + Oid relpkindex; Oid relreplindex; ListCell *l; MemoryContext oldcxt; @@ -4737,6 +4775,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return bms_copy(relation->rd_indexattr); case INDEX_ATTR_BITMAP_KEY: return bms_copy(relation->rd_keyattr); + case INDEX_ATTR_BITMAP_PRIMARY_KEY: + return bms_copy(relation->rd_pkattr); case INDEX_ATTR_BITMAP_IDENTITY_KEY: return bms_copy(relation->rd_idattr); default: @@ -4758,12 +4798,14 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return NULL; /* - * Copy the rd_replidindex value computed by RelationGetIndexList before - * proceeding. This is needed because a relcache flush could occur inside - * index_open below, resetting the fields managed by RelationGetIndexList. - * (The values we're computing will still be valid, assuming that caller - * has a sufficient lock on the relation.) + * Copy the rd_pkindex and rd_replidindex value computed by + * RelationGetIndexList before proceeding. This is needed because a + * relcache flush could occur inside index_open below, resetting the + * fields managed by RelationGetIndexList. (The values we're computing + * will still be valid, assuming that caller has a sufficient lock on + * the relation.) */ + relpkindex = relation->rd_pkindex; relreplindex = relation->rd_replidindex; /* @@ -4778,6 +4820,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) */ indexattrs = NULL; uindexattrs = NULL; + pkindexattrs = NULL; idindexattrs = NULL; foreach(l, indexoidlist) { @@ -4786,6 +4829,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) IndexInfo *indexInfo; int i; bool isKey; /* candidate key */ + bool isPK; /* primary key */ bool isIDKey; /* replica identity index */ indexDesc = index_open(indexOid, AccessShareLock); @@ -4798,6 +4842,9 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) indexInfo->ii_Expressions == NIL && indexInfo->ii_Predicate == NIL; + /* Is this a primary key? */ + isPK = (indexOid == relpkindex); + /* Is this index the configured (or default) replica identity? */ isIDKey = (indexOid == relreplindex); @@ -4815,6 +4862,10 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) uindexattrs = bms_add_member(uindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); + if (isPK) + pkindexattrs = bms_add_member(pkindexattrs, + attrnum - FirstLowInvalidHeapAttributeNumber); + if (isIDKey) idindexattrs = bms_add_member(idindexattrs, attrnum - FirstLowInvalidHeapAttributeNumber); @@ -4837,6 +4888,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) relation->rd_indexattr = NULL; bms_free(relation->rd_keyattr); relation->rd_keyattr = NULL; + bms_free(relation->rd_pkattr); + relation->rd_pkattr = NULL; bms_free(relation->rd_idattr); relation->rd_idattr = NULL; @@ -4849,6 +4902,7 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) */ oldcxt = MemoryContextSwitchTo(CacheMemoryContext); relation->rd_keyattr = bms_copy(uindexattrs); + relation->rd_pkattr = bms_copy(pkindexattrs); relation->rd_idattr = bms_copy(idindexattrs); relation->rd_indexattr = bms_copy(indexattrs); MemoryContextSwitchTo(oldcxt); @@ -4860,6 +4914,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return indexattrs; case INDEX_ATTR_BITMAP_KEY: return uindexattrs; + case INDEX_ATTR_BITMAP_PRIMARY_KEY: + return bms_copy(relation->rd_pkattr); case INDEX_ATTR_BITMAP_IDENTITY_KEY: return idindexattrs; default: @@ -4992,6 +5048,67 @@ RelationGetExclusionInfo(Relation indexRelation, MemoryContextSwitchTo(oldcxt); } +/* + * Get publication actions for the given relation. + */ +struct PublicationActions * +GetRelationPublicationActions(Relation relation) +{ + List *puboids; + ListCell *lc; + MemoryContext oldcxt; + PublicationActions *pubactions = palloc0(sizeof(PublicationActions)); + + if (relation->rd_pubactions) + return memcpy(pubactions, relation->rd_pubactions, + sizeof(PublicationActions)); + + /* Fetch the publication membership info. */ + puboids = GetRelationPublications(RelationGetRelid(relation)); + puboids = list_concat_unique_oid(puboids, GetAllTablesPublications()); + + foreach(lc, puboids) + { + Oid pubid = lfirst_oid(lc); + HeapTuple tup; + Form_pg_publication pubform; + + tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); + + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for publication %u", pubid); + + pubform = (Form_pg_publication) GETSTRUCT(tup); + + pubactions->pubinsert |= pubform->pubinsert; + pubactions->pubupdate |= pubform->pubupdate; + pubactions->pubdelete |= pubform->pubdelete; + + ReleaseSysCache(tup); + + /* + * If we know everything is replicated, there is no point to check + * for other publications. + */ + if (pubactions->pubinsert && pubactions->pubupdate && + pubactions->pubdelete) + break; + } + + if (relation->rd_pubactions) + { + pfree(relation->rd_pubactions); + relation->rd_pubactions = NULL; + } + + /* Now save copy of the actions in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + relation->rd_pubactions = palloc(sizeof(PublicationActions)); + memcpy(relation->rd_pubactions, pubactions, sizeof(PublicationActions)); + MemoryContextSwitchTo(oldcxt); + + return pubactions; +} /* * Routines to support ereport() reports of relation-related errors @@ -5407,10 +5524,13 @@ load_relcache_init_file(bool shared) rel->rd_fkeyvalid = false; rel->rd_indexlist = NIL; rel->rd_oidindex = InvalidOid; + rel->rd_pkindex = InvalidOid; rel->rd_replidindex = InvalidOid; rel->rd_indexattr = NULL; rel->rd_keyattr = NULL; + rel->rd_pkattr = NULL; rel->rd_idattr = NULL; + rel->rd_pubactions = NULL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index d634a3b683..bdfaa0ce75 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -50,6 +50,8 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" +#include "catalog/pg_publication.h" +#include "catalog/pg_publication_rel.h" #include "catalog/pg_range.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_seclabel.h" @@ -59,6 +61,7 @@ #include "catalog/pg_shseclabel.h" #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" #include "catalog/pg_ts_config.h" @@ -658,6 +661,50 @@ static const struct cachedesc cacheinfo[] = { }, 16 }, + {PublicationRelationId, /* PUBLICATIONOID */ + PublicationObjectIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 8 + }, + {PublicationRelationId, /* PUBLICATIONNAME */ + PublicationNameIndexId, + 1, + { + Anum_pg_publication_pubname, + 0, + 0, + 0 + }, + 8 + }, + {PublicationRelRelationId, /* PUBLICATIONREL */ + PublicationRelObjectIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 64 + }, + {PublicationRelRelationId, /* PUBLICATIONRELMAP */ + PublicationRelMapIndexId, + 2, + { + Anum_pg_publication_rel_prrelid, + Anum_pg_publication_rel_prpubid, + 0, + 0 + }, + 64 + }, {RewriteRelationId, /* RULERELNAME */ RewriteRelRulenameIndexId, 2, @@ -691,6 +738,28 @@ static const struct cachedesc cacheinfo[] = { }, 128 }, + {SubscriptionRelationId, /* SUBSCRIPTIONOID */ + SubscriptionObjectIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 4 + }, + {SubscriptionRelationId, /* SUBSCRIPTIONNAME */ + SubscriptionNameIndexId, + 2, + { + Anum_pg_subscription_subdbid, + Anum_pg_subscription_subname, + 0, + 0 + }, + 4 + }, {TableSpaceRelationId, /* TABLESPACEOID */ TablespaceOidIndexId, 1, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 4e2bd4c496..15a09dfd4d 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -59,6 +59,7 @@ #include "postmaster/postmaster.h" #include "postmaster/syslogger.h" #include "postmaster/walwriter.h" +#include "replication/logicallauncher.h" #include "replication/slot.h" #include "replication/syncrep.h" #include "replication/walreceiver.h" @@ -2471,6 +2472,18 @@ static struct config_int ConfigureNamesInt[] = check_max_worker_processes, NULL, NULL }, + { + {"max_logical_replication_workers", + PGC_POSTMASTER, + RESOURCES_ASYNCHRONOUS, + gettext_noop("Maximum number of logical replication worker processes."), + NULL, + }, + &max_logical_replication_workers, + 4, 0, MAX_BACKENDS, + NULL, NULL, NULL + }, + { {"log_rotation_age", PGC_SIGHUP, LOGGING_WHERE, gettext_noop("Automatic log file rotation will occur after N minutes."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 15669b83c7..661b0fa9b6 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -163,6 +163,7 @@ #max_worker_processes = 8 # (change requires restart) #max_parallel_workers_per_gather = 2 # taken from max_worker_processes #max_parallel_workers = 8 # total maximum number of worker_processes +#max_logical_replication_workers = 4 # taken from max_worker_processes #old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate # (change requires restart) #backend_flush_after = 0 # measured in pages, 0 disables diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index d4e36421d2..89530a9f0f 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -292,6 +292,18 @@ getSchemaData(Archive *fout, int *numTablesPtr) write_msg(NULL, "reading partition key information for interesting tables\n"); getTablePartitionKeyInfo(fout, tblinfo, numTables); + if (g_verbose) + write_msg(NULL, "reading publications\n"); + getPublications(fout); + + if (g_verbose) + write_msg(NULL, "reading publication membership\n"); + getPublicationTables(fout, tblinfo, numTables); + + if (g_verbose) + write_msg(NULL, "reading subscriptions\n"); + getSubscriptions(fout); + *numTablesPtr = numTables; return tblinfo; } diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index 7241cdfc44..6480fb8e74 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -119,6 +119,7 @@ typedef struct _restoreOptions bool *idWanted; /* array showing which dump IDs to emit */ int enable_row_security; int sequence_data; /* dump sequence data even in schema-only mode */ + int include_subscriptions; } RestoreOptions; typedef struct _dumpOptions @@ -152,6 +153,8 @@ typedef struct _dumpOptions int outputNoTablespaces; int use_setsessauth; int enable_row_security; + int include_subscriptions; + int no_create_subscription_slots; /* default, if no "inclusion" switches appear, is to dump everything */ bool include_everything; diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index b89bd99e49..929f1b592b 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -172,6 +172,7 @@ dumpOptionsFromRestoreOptions(RestoreOptions *ropt) dopt->include_everything = ropt->include_everything; dopt->enable_row_security = ropt->enable_row_security; dopt->sequence_data = ropt->sequence_data; + dopt->include_subscriptions = ropt->include_subscriptions; return dopt; } @@ -3266,6 +3267,8 @@ _getObjectDescription(PQExpBuffer buf, TocEntry *te, ArchiveHandle *AH) strcmp(type, "SCHEMA") == 0 || strcmp(type, "FOREIGN DATA WRAPPER") == 0 || strcmp(type, "SERVER") == 0 || + strcmp(type, "PUBLICATION") == 0 || + strcmp(type, "SUBSCRIPTION") == 0 || strcmp(type, "USER MAPPING") == 0) { /* We already know that search_path was set properly */ @@ -3476,7 +3479,9 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass) strcmp(te->desc, "TEXT SEARCH DICTIONARY") == 0 || strcmp(te->desc, "TEXT SEARCH CONFIGURATION") == 0 || strcmp(te->desc, "FOREIGN DATA WRAPPER") == 0 || - strcmp(te->desc, "SERVER") == 0) + strcmp(te->desc, "SERVER") == 0 || + strcmp(te->desc, "PUBLICATION") == 0 || + strcmp(te->desc, "SUBSCRIPTION") == 0) { PQExpBuffer temp = createPQExpBuffer(); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 883fde1e5a..0bb363957a 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -246,6 +246,9 @@ static void getBlobs(Archive *fout); static void dumpBlob(Archive *fout, BlobInfo *binfo); static int dumpBlobs(Archive *fout, void *arg); static void dumpPolicy(Archive *fout, PolicyInfo *polinfo); +static void dumpPublication(Archive *fout, PublicationInfo *pubinfo); +static void dumpPublicationTable(Archive *fout, PublicationRelInfo *pubrinfo); +static void dumpSubscription(Archive *fout, SubscriptionInfo *subinfo); static void dumpDatabase(Archive *AH); static void dumpEncoding(Archive *AH); static void dumpStdStrings(Archive *AH); @@ -338,6 +341,7 @@ main(int argc, char **argv) {"enable-row-security", no_argument, &dopt.enable_row_security, 1}, {"exclude-table-data", required_argument, NULL, 4}, {"if-exists", no_argument, &dopt.if_exists, 1}, + {"include-subscriptions", no_argument, &dopt.include_subscriptions, 1}, {"inserts", no_argument, &dopt.dump_inserts, 1}, {"lock-wait-timeout", required_argument, NULL, 2}, {"no-tablespaces", no_argument, &dopt.outputNoTablespaces, 1}, @@ -348,6 +352,7 @@ main(int argc, char **argv) {"snapshot", required_argument, NULL, 6}, {"strict-names", no_argument, &strict_names, 1}, {"use-set-session-authorization", no_argument, &dopt.use_setsessauth, 1}, + {"no-create-subscription-slots", no_argument, &dopt.no_create_subscription_slots, 1}, {"no-security-labels", no_argument, &dopt.no_security_labels, 1}, {"no-synchronized-snapshots", no_argument, &dopt.no_synchronized_snapshots, 1}, {"no-unlogged-table-data", no_argument, &dopt.no_unlogged_table_data, 1}, @@ -849,6 +854,7 @@ main(int argc, char **argv) ropt->include_everything = dopt.include_everything; ropt->enable_row_security = dopt.enable_row_security; ropt->sequence_data = dopt.sequence_data; + ropt->include_subscriptions = dopt.include_subscriptions; if (compressLevel == -1) ropt->compression = 0; @@ -929,7 +935,10 @@ help(const char *progname) " access to)\n")); printf(_(" --exclude-table-data=TABLE do NOT dump data for the named table(s)\n")); printf(_(" --if-exists use IF EXISTS when dropping objects\n")); + printf(_(" --include-subscriptions dump logical replication subscriptions\n")); printf(_(" --inserts dump data as INSERT commands, rather than COPY\n")); + printf(_(" --no-create-subscription-slots\n" + " do not create replication slots for subscriptions\n")); printf(_(" --no-security-labels do not dump security label assignments\n")); printf(_(" --no-synchronized-snapshots do not use synchronized snapshots in parallel jobs\n")); printf(_(" --no-tablespaces do not dump tablespace assignments\n")); @@ -3311,6 +3320,449 @@ dumpPolicy(Archive *fout, PolicyInfo *polinfo) destroyPQExpBuffer(delqry); } +/* + * getPublications + * get information about publications + */ +void +getPublications(Archive *fout) +{ + PQExpBuffer query; + PGresult *res; + PublicationInfo *pubinfo; + int i_tableoid; + int i_oid; + int i_pubname; + int i_rolname; + int i_puballtables; + int i_pubinsert; + int i_pubupdate; + int i_pubdelete; + int i, + ntups; + + if (fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + resetPQExpBuffer(query); + + /* Get the publications. */ + appendPQExpBuffer(query, + "SELECT p.tableoid, p.oid, p.pubname, " + "(%s p.pubowner) AS rolname, " + "p.puballtables, p.pubinsert, p.pubupdate, p.pubdelete " + "FROM pg_catalog.pg_publication p", + username_subquery); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + if (ntups == 0) + { + /* + * There are no publications defined. Clean up and return. + */ + PQclear(res); + return; + } + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_pubname = PQfnumber(res, "pubname"); + i_rolname = PQfnumber(res, "rolname"); + i_puballtables = PQfnumber(res, "puballtables"); + i_pubinsert = PQfnumber(res, "pubinsert"); + i_pubupdate = PQfnumber(res, "pubupdate"); + i_pubdelete = PQfnumber(res, "pubdelete"); + + pubinfo = pg_malloc(ntups * sizeof(PublicationInfo)); + + for (i = 0; i < ntups; i++) + { + pubinfo[i].dobj.objType = DO_PUBLICATION; + pubinfo[i].dobj.catId.tableoid = + atooid(PQgetvalue(res, i, i_tableoid)); + pubinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid)); + AssignDumpId(&pubinfo[i].dobj); + pubinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_pubname)); + pubinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname)); + pubinfo[i].puballtables = + (strcmp(PQgetvalue(res, i, i_puballtables), "t") == 0); + pubinfo[i].pubinsert = + (strcmp(PQgetvalue(res, i, i_pubinsert), "t") == 0); + pubinfo[i].pubupdate = + (strcmp(PQgetvalue(res, i, i_pubupdate), "t") == 0); + pubinfo[i].pubdelete = + (strcmp(PQgetvalue(res, i, i_pubdelete), "t") == 0); + + if (strlen(pubinfo[i].rolname) == 0) + write_msg(NULL, "WARNING: owner of publication \"%s\" appears to be invalid\n", + pubinfo[i].dobj.name); + } + PQclear(res); + + destroyPQExpBuffer(query); +} + +/* + * dumpPublication + * dump the definition of the given publication + */ +static void +dumpPublication(Archive *fout, PublicationInfo *pubinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer delq; + PQExpBuffer query; + + if (dopt->dataOnly) + return; + + delq = createPQExpBuffer(); + query = createPQExpBuffer(); + + appendPQExpBuffer(delq, "DROP PUBLICATION %s;\n", + fmtId(pubinfo->dobj.name)); + + appendPQExpBuffer(query, "CREATE PUBLICATION %s", + fmtId(pubinfo->dobj.name)); + + if (pubinfo->puballtables) + appendPQExpBufferStr(query, " FOR ALL TABLES"); + + appendPQExpBufferStr(query, " WITH ("); + if (pubinfo->pubinsert) + appendPQExpBufferStr(query, "PUBLISH INSERT"); + else + appendPQExpBufferStr(query, "NOPUBLISH INSERT"); + + if (pubinfo->pubupdate) + appendPQExpBufferStr(query, ", PUBLISH UPDATE"); + else + appendPQExpBufferStr(query, ", NOPUBLISH UPDATE"); + + if (pubinfo->pubdelete) + appendPQExpBufferStr(query, ", PUBLISH DELETE"); + else + appendPQExpBufferStr(query, ", NOPUBLISH DELETE"); + + appendPQExpBufferStr(query, ");\n"); + + ArchiveEntry(fout, pubinfo->dobj.catId, pubinfo->dobj.dumpId, + pubinfo->dobj.name, + NULL, + NULL, + pubinfo->rolname, false, + "PUBLICATION", SECTION_POST_DATA, + query->data, delq->data, NULL, + NULL, 0, + NULL, NULL); + + destroyPQExpBuffer(delq); + destroyPQExpBuffer(query); +} + +/* + * getPublicationTables + * get information about publication membership for dumpable tables. + */ +void +getPublicationTables(Archive *fout, TableInfo tblinfo[], int numTables) +{ + PQExpBuffer query; + PGresult *res; + PublicationRelInfo *pubrinfo; + int i_tableoid; + int i_oid; + int i_pubname; + int i, + j, + ntups; + + if (fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + for (i = 0; i < numTables; i++) + { + TableInfo *tbinfo = &tblinfo[i]; + + /* Only plain tables can be aded to publications. */ + if (tbinfo->relkind != RELKIND_RELATION) + continue; + + /* + * Ignore publication membership of tables whose definitions are + * not to be dumped. + */ + if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) + continue; + + if (g_verbose) + write_msg(NULL, "reading publication membership for table \"%s.%s\"\n", + tbinfo->dobj.namespace->dobj.name, + tbinfo->dobj.name); + + resetPQExpBuffer(query); + + /* Get the publication memebership for the table. */ + appendPQExpBuffer(query, + "SELECT pr.tableoid, pr.oid, p.pubname " + "FROM pg_catalog.pg_publication_rel pr," + " pg_catalog.pg_publication p " + "WHERE pr.prrelid = '%u'" + " AND p.oid = pr.prpubid", + tbinfo->dobj.catId.oid); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + if (ntups == 0) + { + /* + * Table is not member of any publications. Clean up and return. + */ + PQclear(res); + continue; + } + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_pubname = PQfnumber(res, "pubname"); + + pubrinfo = pg_malloc(ntups * sizeof(PublicationRelInfo)); + + for (j = 0; j < ntups; j++) + { + pubrinfo[j].dobj.objType = DO_PUBLICATION_REL; + pubrinfo[j].dobj.catId.tableoid = + atooid(PQgetvalue(res, j, i_tableoid)); + pubrinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); + AssignDumpId(&pubrinfo[j].dobj); + pubrinfo[j].dobj.namespace = tbinfo->dobj.namespace; + pubrinfo[j].pubname = pg_strdup(PQgetvalue(res, j, i_pubname)); + pubrinfo[j].pubtable = tbinfo; + } + PQclear(res); + } + destroyPQExpBuffer(query); +} + +/* + * dumpPublicationTable + * dump the definition of the given publication table mapping + */ +static void +dumpPublicationTable(Archive *fout, PublicationRelInfo *pubrinfo) +{ + DumpOptions *dopt = fout->dopt; + TableInfo *tbinfo = pubrinfo->pubtable; + PQExpBuffer query; + char *tag; + + if (dopt->dataOnly) + return; + + tag = psprintf("%s %s", pubrinfo->pubname, tbinfo->dobj.name); + + query = createPQExpBuffer(); + + appendPQExpBuffer(query, "ALTER PUBLICATION %s ADD TABLE", + fmtId(pubrinfo->pubname)); + appendPQExpBuffer(query, " %s;", + fmtId(tbinfo->dobj.name)); + + /* + * There is no point in creating drop query as drop query as the drop + * is done by table drop. + */ + ArchiveEntry(fout, pubrinfo->dobj.catId, pubrinfo->dobj.dumpId, + tag, + tbinfo->dobj.namespace->dobj.name, + NULL, + "", false, + "PUBLICATION TABLE", SECTION_POST_DATA, + query->data, "", NULL, + NULL, 0, + NULL, NULL); + + free(tag); + destroyPQExpBuffer(query); +} + + +/* + * getSubscriptions + * get information about subscriptions + */ +void +getSubscriptions(Archive *fout) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer query; + PGresult *res; + SubscriptionInfo *subinfo; + int i_tableoid; + int i_oid; + int i_subname; + int i_rolname; + int i_subenabled; + int i_subconninfo; + int i_subslotname; + int i_subpublications; + int i, + ntups; + + if (!dopt->include_subscriptions || fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + resetPQExpBuffer(query); + + /* Get the subscriptions in current database. */ + appendPQExpBuffer(query, + "SELECT s.tableoid, s.oid, s.subname," + "(%s s.subowner) AS rolname, s.subenabled, " + " s.subconninfo, s.subslotname, s.subpublications " + "FROM pg_catalog.pg_subscription s " + "WHERE s.subdbid = (SELECT oid FROM pg_catalog.pg_database" + " WHERE datname = current_database())", + username_subquery); + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + if (ntups == 0) + { + /* + * There are no subscriptions defined. Clean up and return. + */ + PQclear(res); + return; + } + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_subname = PQfnumber(res, "subname"); + i_rolname = PQfnumber(res, "rolname"); + i_subenabled = PQfnumber(res, "subenabled"); + i_subconninfo = PQfnumber(res, "subconninfo"); + i_subslotname = PQfnumber(res, "subslotname"); + i_subpublications = PQfnumber(res, "subpublications"); + + subinfo = pg_malloc(ntups * sizeof(SubscriptionInfo)); + + for (i = 0; i < ntups; i++) + { + subinfo[i].dobj.objType = DO_SUBSCRIPTION; + subinfo[i].dobj.catId.tableoid = + atooid(PQgetvalue(res, i, i_tableoid)); + subinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, i_oid)); + AssignDumpId(&subinfo[i].dobj); + subinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, i_subname)); + subinfo[i].rolname = pg_strdup(PQgetvalue(res, i, i_rolname)); + subinfo[i].subenabled = + (strcmp(PQgetvalue(res, i, i_subenabled), "t") == 0); + subinfo[i].subconninfo = pg_strdup(PQgetvalue(res, i, i_subconninfo)); + subinfo[i].subslotname = pg_strdup(PQgetvalue(res, i, i_subslotname)); + subinfo[i].subpublications = + pg_strdup(PQgetvalue(res, i, i_subpublications)); + + if (strlen(subinfo[i].rolname) == 0) + write_msg(NULL, "WARNING: owner of subscription \"%s\" appears to be invalid\n", + subinfo[i].dobj.name); + } + PQclear(res); + + destroyPQExpBuffer(query); +} + +/* + * dumpSubscription + * dump the definition of the given subscription + */ +static void +dumpSubscription(Archive *fout, SubscriptionInfo *subinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer delq; + PQExpBuffer query; + PQExpBuffer publications; + char **pubnames = NULL; + int npubnames = 0; + int i; + + if (dopt->dataOnly) + return; + + delq = createPQExpBuffer(); + query = createPQExpBuffer(); + + appendPQExpBuffer(delq, "DROP SUBSCRIPTION %s;\n", + fmtId(subinfo->dobj.name)); + + appendPQExpBuffer(query, "CREATE SUBSCRIPTION %s CONNECTION ", + fmtId(subinfo->dobj.name)); + appendStringLiteralAH(query, subinfo->subconninfo, fout); + + /* Build list of quoted publications and append them to query. */ + if (!parsePGArray(subinfo->subpublications, &pubnames, &npubnames)) + { + write_msg(NULL, + "WARNING: could not parse subpublications array\n"); + if (pubnames) + free(pubnames); + pubnames = NULL; + npubnames = 0; + } + + publications = createPQExpBuffer(); + for (i = 0; i < npubnames; i++) + { + if (i > 0) + appendPQExpBufferStr(publications, ", "); + + appendPQExpBufferStr(publications, fmtId(pubnames[i])); + } + + appendPQExpBuffer(query, " PUBLICATION %s WITH (", publications->data); + + if (subinfo->subenabled) + appendPQExpBufferStr(query, "ENABLED"); + else + appendPQExpBufferStr(query, "DISABLED"); + + appendPQExpBufferStr(query, ", SLOT NAME = "); + appendStringLiteralAH(query, subinfo->subslotname, fout); + + if (dopt->no_create_subscription_slots) + appendPQExpBufferStr(query, ", NOCREATE SLOT"); + + appendPQExpBufferStr(query, ");\n"); + + ArchiveEntry(fout, subinfo->dobj.catId, subinfo->dobj.dumpId, + subinfo->dobj.name, + NULL, + NULL, + subinfo->rolname, false, + "SUBSCRIPTION", SECTION_POST_DATA, + query->data, delq->data, NULL, + NULL, 0, + NULL, NULL); + + destroyPQExpBuffer(publications); + if (pubnames) + free(pubnames); + + destroyPQExpBuffer(delq); + destroyPQExpBuffer(query); +} + static void binary_upgrade_set_type_oids_by_type_oid(Archive *fout, PQExpBuffer upgrade_buffer, @@ -8752,6 +9204,15 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_POLICY: dumpPolicy(fout, (PolicyInfo *) dobj); break; + case DO_PUBLICATION: + dumpPublication(fout, (PublicationInfo *) dobj); + break; + case DO_PUBLICATION_REL: + dumpPublicationTable(fout, (PublicationRelInfo *) dobj); + break; + case DO_SUBSCRIPTION: + dumpSubscription(fout, (SubscriptionInfo *) dobj); + break; case DO_PRE_DATA_BOUNDARY: case DO_POST_DATA_BOUNDARY: /* never dumped, nothing to do */ @@ -16627,6 +17088,9 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, case DO_EVENT_TRIGGER: case DO_DEFAULT_ACL: case DO_POLICY: + case DO_PUBLICATION: + case DO_PUBLICATION_REL: + case DO_SUBSCRIPTION: /* Post-data objects: must come after the post-data boundary */ addObjectDependency(dobj, postDataBound->dumpId); break; diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 0c920a3907..77de22fcb8 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -79,7 +79,10 @@ typedef enum DO_POST_DATA_BOUNDARY, DO_EVENT_TRIGGER, DO_REFRESH_MATVIEW, - DO_POLICY + DO_POLICY, + DO_PUBLICATION, + DO_PUBLICATION_REL, + DO_SUBSCRIPTION } DumpableObjectType; /* component types of an object which can be selected for dumping */ @@ -566,6 +569,43 @@ typedef struct _policyInfo char *polwithcheck; } PolicyInfo; +/* + * The PublicationInfo struct is used to represent publications. + */ +typedef struct _PublicationInfo +{ + DumpableObject dobj; + char *rolname; + bool puballtables; + bool pubinsert; + bool pubupdate; + bool pubdelete; +} PublicationInfo; + +/* + * The PublicationRelInfo struct is used to represent publication table + * mapping. + */ +typedef struct _PublicationRelInfo +{ + DumpableObject dobj; + TableInfo *pubtable; + char *pubname; +} PublicationRelInfo; + +/* + * The SubscriptionInfo struct is used to represent subscription. + */ +typedef struct _SubscriptionInfo +{ + DumpableObject dobj; + char *rolname; + bool subenabled; + char *subconninfo; + char *subslotname; + char *subpublications; +} SubscriptionInfo; + /* * We build an array of these with an entry for each object that is an * extension member according to pg_depend. @@ -663,5 +703,9 @@ extern void processExtensionTables(Archive *fout, ExtensionInfo extinfo[], extern EventTriggerInfo *getEventTriggers(Archive *fout, int *numEventTriggers); extern void getPolicies(Archive *fout, TableInfo tblinfo[], int numTables); extern void getTablePartitionKeyInfo(Archive *fout, TableInfo *tblinfo, int numTables); +extern void getPublications(Archive *fout); +extern void getPublicationTables(Archive *fout, TableInfo tblinfo[], + int numTables); +extern void getSubscriptions(Archive *fout); #endif /* PG_DUMP_H */ diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index 1db680b950..ea643397ba 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -71,7 +71,10 @@ static const int dbObjectTypePriority[] = 26, /* DO_POST_DATA_BOUNDARY */ 33, /* DO_EVENT_TRIGGER */ 34, /* DO_REFRESH_MATVIEW */ - 35 /* DO_POLICY */ + 35, /* DO_POLICY */ + 36, /* DO_PUBLICATION */ + 37, /* DO_PUBLICATION_REL */ + 38 /* DO_SUBSCRIPTION */ }; static DumpId preDataBoundId; @@ -1397,6 +1400,21 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "POLICY (ID %d OID %u)", obj->dumpId, obj->catId.oid); return; + case DO_PUBLICATION: + snprintf(buf, bufsize, + "PUBLICATION (ID %d OID %u)", + obj->dumpId, obj->catId.oid); + return; + case DO_PUBLICATION_REL: + snprintf(buf, bufsize, + "PUBLICATION TABLE (ID %d OID %u)", + obj->dumpId, obj->catId.oid); + return; + case DO_SUBSCRIPTION: + snprintf(buf, bufsize, + "SUBSCRIPTION (ID %d OID %u)", + obj->dumpId, obj->catId.oid); + return; case DO_PRE_DATA_BOUNDARY: snprintf(buf, bufsize, "PRE-DATA BOUNDARY (ID %d)", diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index 239b0d8ac0..497677494b 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -72,6 +72,7 @@ main(int argc, char **argv) char *inputFileSpec; static int disable_triggers = 0; static int enable_row_security = 0; + static int include_subscriptions = 0; static int if_exists = 0; static int no_data_for_failed_tables = 0; static int outputNoTablespaces = 0; @@ -116,6 +117,7 @@ main(int argc, char **argv) {"disable-triggers", no_argument, &disable_triggers, 1}, {"enable-row-security", no_argument, &enable_row_security, 1}, {"if-exists", no_argument, &if_exists, 1}, + {"include-subscriptions", no_argument, &include_subscriptions, 1}, {"no-data-for-failed-tables", no_argument, &no_data_for_failed_tables, 1}, {"no-tablespaces", no_argument, &outputNoTablespaces, 1}, {"role", required_argument, NULL, 2}, @@ -356,6 +358,7 @@ main(int argc, char **argv) opts->disable_triggers = disable_triggers; opts->enable_row_security = enable_row_security; + opts->include_subscriptions = include_subscriptions; opts->noDataForFailedTables = no_data_for_failed_tables; opts->noTablespace = outputNoTablespaces; opts->use_setsessauth = use_setsessauth; diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index b732627c3a..488eec30f5 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -301,7 +301,7 @@ my %tests = ( 'ALTER FUNCTION dump_test.pltestlang_call_handler() OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ \QALTER FUNCTION dump_test.pltestlang_call_handler() \E \QOWNER TO \E @@ -358,7 +358,7 @@ my %tests = ( 'ALTER PROCEDURAL LANGUAGE pltestlang OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ALTER PROCEDURAL LANGUAGE pltestlang OWNER TO .*;/m, like => { binary_upgrade => 1, @@ -382,7 +382,7 @@ my %tests = ( 'ALTER SCHEMA dump_test OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ALTER SCHEMA dump_test OWNER TO .*;/m, like => { binary_upgrade => 1, @@ -406,7 +406,7 @@ my %tests = ( 'ALTER SCHEMA dump_test_second_schema OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ALTER SCHEMA dump_test_second_schema OWNER TO .*;/m, like => { binary_upgrade => 1, @@ -524,7 +524,7 @@ my %tests = ( 'ALTER TABLE test_table OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ALTER TABLE test_table OWNER TO .*;/m, like => { binary_upgrade => 1, @@ -577,7 +577,7 @@ my %tests = ( 'ALTER TABLE test_second_table OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ALTER TABLE test_second_table OWNER TO .*;/m, like => { binary_upgrade => 1, @@ -601,7 +601,7 @@ my %tests = ( 'ALTER TABLE test_third_table OWNER TO' => { all_runs => 1, - catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs)', + catch_all => 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)', regexp => qr/^ALTER TABLE test_third_table OWNER TO .*;/m, like => { binary_upgrade => 1, @@ -623,10 +623,10 @@ my %tests = ( only_dump_test_table => 1, test_schema_plus_blobs => 1, }, }, - # catch-all for ALTER ... OWNER (except LARGE OBJECTs) - 'ALTER ... OWNER commands (except LARGE OBJECTs)' => { + # catch-all for ALTER ... OWNER (except LARGE OBJECTs and PUBLICATIONs) + 'ALTER ... OWNER commands (except LARGE OBJECTs and PUBLICATIONs)' => { all_runs => 0, # catch-all - regexp => qr/^ALTER (?!LARGE OBJECT)(.*) OWNER TO .*;/m, + regexp => qr/^ALTER (?!LARGE OBJECT|PUBLICATION)(.*) OWNER TO .*;/m, like => {}, # use more-specific options above unlike => { column_inserts => 1, @@ -2217,6 +2217,62 @@ my %tests = ( pg_dumpall_globals_clean => 1, role => 1, section_pre_data => 1, }, }, + + 'CREATE PUBLICATION pub1' => { + create_order => 50, + create_sql => 'CREATE PUBLICATION pub1;', + regexp => qr/^ + \QCREATE PUBLICATION pub1 WITH (PUBLISH INSERT, PUBLISH UPDATE, PUBLISH DELETE);\E + /xm, + like => { + binary_upgrade => 1, + clean => 1, + clean_if_exists => 1, + createdb => 1, + defaults => 1, + exclude_test_table_data => 1, + exclude_dump_test_schema => 1, + exclude_test_table => 1, + no_privs => 1, + no_owner => 1, + only_dump_test_schema => 1, + only_dump_test_table => 1, + pg_dumpall_dbprivs => 1, + schema_only => 1, + section_post_data => 1, + test_schema_plus_blobs => 1, }, + unlike => { + section_pre_data => 1, + pg_dumpall_globals => 1, + pg_dumpall_globals_clean => 1, }, }, + 'ALTER PUBLICATION pub1 ADD TABLE test_table' => { + create_order => 51, + create_sql => 'ALTER PUBLICATION pub1 ADD TABLE dump_test.test_table;', + regexp => qr/^ + \QALTER PUBLICATION pub1 ADD TABLE test_table;\E + /xm, + like => { + binary_upgrade => 1, + clean => 1, + clean_if_exists => 1, + createdb => 1, + defaults => 1, + exclude_test_table_data => 1, + no_privs => 1, + no_owner => 1, + only_dump_test_schema => 1, + only_dump_test_table => 1, + pg_dumpall_dbprivs => 1, + schema_only => 1, + section_post_data => 1, + test_schema_plus_blobs => 1, }, + unlike => { + section_pre_data => 1, + exclude_dump_test_schema => 1, + exclude_test_table => 1, + pg_dumpall_globals => 1, + pg_dumpall_globals_clean => 1, }, }, + 'CREATE SCHEMA dump_test' => { all_runs => 1, catch_all => 'CREATE ... commands', diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 4139b7763f..0c164a339c 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -501,6 +501,22 @@ exec_command(const char *cmd, else success = PSQL_CMD_UNKNOWN; break; + case 'R': + switch (cmd[2]) + { + case 'p': + if (show_verbose) + success = describePublications(pattern); + else + success = listPublications(pattern); + break; + case 's': + success = describeSubscriptions(pattern, show_verbose); + break; + default: + status = PSQL_CMD_UNKNOWN; + } + break; case 'u': success = describeRoles(pattern, show_verbose, show_system); break; diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index ce198779f4..c501168d8c 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2387,6 +2387,38 @@ describeOneTableDetails(const char *schemaname, } PQclear(result); } + + /* print any publications */ + if (pset.sversion >= 100000) + { + printfPQExpBuffer(&buf, + "SELECT pub.pubname\n" + " FROM pg_catalog.pg_publication pub\n" + " LEFT JOIN pg_publication_rel pr\n" + " ON (pr.prpubid = pub.oid)\n" + "WHERE pr.prrelid = '%s' OR pub.puballtables\n" + "ORDER BY 1;", + oid); + + result = PSQLexec(buf.data); + if (!result) + goto error_return; + else + tuples = PQntuples(result); + + if (tuples > 0) + printTableAddFooter(&cont, _("Publications:")); + + /* Might be an empty set - that's ok */ + for (i = 0; i < tuples; i++) + { + printfPQExpBuffer(&buf, " \"%s\"", + PQgetvalue(result, i, 0)); + + printTableAddFooter(&cont, buf.data); + } + PQclear(result); + } } if (view_def) @@ -4846,6 +4878,266 @@ listOneExtensionContents(const char *extname, const char *oid) return true; } +/* \dRp + * Lists publications. + * + * Takes an optional regexp to select particular publications + */ +bool +listPublications(const char *pattern) +{ + PQExpBufferData buf; + PGresult *res; + printQueryOpt myopt = pset.popt; + static const bool translate_columns[] = {false, false, false, false, false}; + + if (pset.sversion < 100000) + { + char sverbuf[32]; + psql_error("The server (version %s) does not support publications.\n", + formatPGVersionNumber(pset.sversion, false, + sverbuf, sizeof(sverbuf))); + return true; + } + + initPQExpBuffer(&buf); + + printfPQExpBuffer(&buf, + "SELECT pubname AS \"%s\",\n" + " pg_catalog.pg_get_userbyid(pubowner) AS \"%s\",\n" + " pubinsert AS \"%s\",\n" + " pubupdate AS \"%s\",\n" + " pubdelete AS \"%s\"\n", + gettext_noop("Name"), + gettext_noop("Owner"), + gettext_noop("Inserts"), + gettext_noop("Updates"), + gettext_noop("Deletes")); + + appendPQExpBufferStr(&buf, + "\nFROM pg_catalog.pg_publication\n"); + + processSQLNamePattern(pset.db, &buf, pattern, false, false, + NULL, "pubname", NULL, + NULL); + + appendPQExpBufferStr(&buf, "ORDER BY 1;"); + + res = PSQLexec(buf.data); + termPQExpBuffer(&buf); + if (!res) + return false; + + myopt.nullPrint = NULL; + myopt.title = _("List of publications"); + myopt.translate_header = true; + myopt.translate_columns = translate_columns; + myopt.n_translate_columns = lengthof(translate_columns); + + printQuery(res, &myopt, pset.queryFout, false, pset.logfile); + + PQclear(res); + + return true; +} + +/* \dRp+ + * Describes publications including the contents. + * + * Takes an optional regexp to select particular publications + */ +bool +describePublications(const char *pattern) +{ + PQExpBufferData buf; + int i; + PGresult *res; + + if (pset.sversion < 100000) + { + char sverbuf[32]; + psql_error("The server (version %s) does not support publications.\n", + formatPGVersionNumber(pset.sversion, false, + sverbuf, sizeof(sverbuf))); + return true; + } + + initPQExpBuffer(&buf); + + printfPQExpBuffer(&buf, + "SELECT oid, pubname, puballtables, pubinsert,\n" + " pubupdate, pubdelete\n" + "FROM pg_catalog.pg_publication\n"); + + processSQLNamePattern(pset.db, &buf, pattern, false, false, + NULL, "pubname", NULL, + NULL); + + appendPQExpBufferStr(&buf, "ORDER BY 2;"); + + res = PSQLexec(buf.data); + if (!res) + { + termPQExpBuffer(&buf); + return false; + } + + for (i = 0; i < PQntuples(res); i++) + { + const char align = 'l'; + int ncols = 3; + int nrows = 1; + int tables = 0; + PGresult *tabres; + char *pubid = PQgetvalue(res, i, 0); + char *pubname = PQgetvalue(res, i, 1); + bool puballtables = strcmp(PQgetvalue(res, i, 2), "t") == 0; + int j; + PQExpBufferData title; + printTableOpt myopt = pset.popt.topt; + printTableContent cont; + + initPQExpBuffer(&title); + printfPQExpBuffer(&title, _("Publication %s"), pubname); + printTableInit(&cont, &myopt, title.data, ncols, nrows); + + printTableAddHeader(&cont, gettext_noop("Inserts"), true, align); + printTableAddHeader(&cont, gettext_noop("Updates"), true, align); + printTableAddHeader(&cont, gettext_noop("Deletes"), true, align); + + printTableAddCell(&cont, PQgetvalue(res, i, 3), false, false); + printTableAddCell(&cont, PQgetvalue(res, i, 4), false, false); + printTableAddCell(&cont, PQgetvalue(res, i, 5), false, false); + + if (puballtables) + printfPQExpBuffer(&buf, + "SELECT n.nspname, c.relname\n" + "FROM pg_catalog.pg_class c,\n" + " pg_catalog.pg_namespace n\n" + "WHERE c.relnamespace = n.oid\n" + " AND c.relkind = 'r'\n" + " AND n.nspname <> 'pg_catalog'\n" + " AND n.nspname <> 'information_schema'\n" + "ORDER BY 1,2"); + else + printfPQExpBuffer(&buf, + "SELECT n.nspname, c.relname\n" + "FROM pg_catalog.pg_class c,\n" + " pg_catalog.pg_namespace n,\n" + " pg_catalog.pg_publication_rel pr\n" + "WHERE c.relnamespace = n.oid\n" + " AND c.oid = pr.prrelid\n" + " AND pr.prpubid = '%s'\n" + "ORDER BY 1,2", pubid); + + tabres = PSQLexec(buf.data); + if (!tabres) + { + printTableCleanup(&cont); + PQclear(res); + termPQExpBuffer(&buf); + termPQExpBuffer(&title); + return false; + } + else + tables = PQntuples(tabres); + + if (tables > 0) + printTableAddFooter(&cont, _("Tables:")); + + for (j = 0; j < tables; j++) + { + printfPQExpBuffer(&buf, " \"%s.%s\"", + PQgetvalue(tabres, j, 0), + PQgetvalue(tabres, j, 1)); + + printTableAddFooter(&cont, buf.data); + } + PQclear(tabres); + + printTable(&cont, pset.queryFout, false, pset.logfile); + printTableCleanup(&cont); + + termPQExpBuffer(&title); + } + + termPQExpBuffer(&buf); + PQclear(res); + + return true; +} + +/* \dRs + * Describes subscriptions. + * + * Takes an optional regexp to select particular subscriptions + */ +bool +describeSubscriptions(const char *pattern, bool verbose) +{ + PQExpBufferData buf; + PGresult *res; + printQueryOpt myopt = pset.popt; + static const bool translate_columns[] = {false, false, false, false, false}; + + if (pset.sversion < 100000) + { + char sverbuf[32]; + psql_error("The server (version %s) does not support subscriptions.\n", + formatPGVersionNumber(pset.sversion, false, + sverbuf, sizeof(sverbuf))); + return true; + } + + initPQExpBuffer(&buf); + + printfPQExpBuffer(&buf, + "SELECT subname AS \"%s\"\n" + ", pg_catalog.pg_get_userbyid(subowner) AS \"%s\"\n" + ", subenabled AS \"%s\"\n" + ", subpublications AS \"%s\"\n", + gettext_noop("Name"), + gettext_noop("Owner"), + gettext_noop("Enabled"), + gettext_noop("Publication")); + + if (verbose) + { + appendPQExpBuffer(&buf, + ", subconninfo AS \"%s\"\n", + gettext_noop("Conninfo")); + } + + /* Only display subscritpions in current database. */ + appendPQExpBufferStr(&buf, + "FROM pg_catalog.pg_subscription\n" + "WHERE subdbid = (SELECT oid\n" + " FROM pg_catalog.pg_database\n" + " WHERE datname = current_database())"); + + processSQLNamePattern(pset.db, &buf, pattern, true, false, + NULL, "subname", NULL, + NULL); + + appendPQExpBufferStr(&buf, "ORDER BY 1;"); + + res = PSQLexec(buf.data); + termPQExpBuffer(&buf); + if (!res) + return false; + + myopt.nullPrint = NULL; + myopt.title = _("List of subscriptions"); + myopt.translate_header = true; + myopt.translate_columns = translate_columns; + myopt.n_translate_columns = lengthof(translate_columns); + + printQuery(res, &myopt, pset.queryFout, false, pset.logfile); + + PQclear(res); + return true; +} + /* * printACLColumn * diff --git a/src/bin/psql/describe.h b/src/bin/psql/describe.h index 4600182e4c..074553e133 100644 --- a/src/bin/psql/describe.h +++ b/src/bin/psql/describe.h @@ -102,4 +102,13 @@ extern bool listExtensionContents(const char *pattern); /* \dy */ extern bool listEventTriggers(const char *pattern, bool verbose); +/* \dRp */ +bool listPublications(const char *pattern); + +/* \dRp+ */ +bool describePublications(const char *pattern); + +/* \dRs */ +bool describeSubscriptions(const char *pattern, bool verbose); + #endif /* DESCRIBE_H */ diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index 09baf871dd..53656294da 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -241,6 +241,8 @@ slashUsage(unsigned short int pager) fprintf(output, _(" \\dO[S+] [PATTERN] list collations\n")); fprintf(output, _(" \\dp [PATTERN] list table, view, and sequence access privileges\n")); fprintf(output, _(" \\drds [PATRN1 [PATRN2]] list per-database role settings\n")); + fprintf(output, _(" \\dRp[+] [PATTERN] list replication publications\n")); + fprintf(output, _(" \\dRs[+] [PATTERN] list replication subscriptions\n")); fprintf(output, _(" \\ds[S+] [PATTERN] list sequences\n")); fprintf(output, _(" \\dt[S+] [PATTERN] list tables\n")); fprintf(output, _(" \\dT[S+] [PATTERN] list data types\n")); diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 7709112f49..d6fffcf42f 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -960,11 +960,13 @@ static const pgsql_thing_t words_after_create[] = { {"OWNED", NULL, NULL, THING_NO_CREATE}, /* for DROP OWNED BY ... */ {"PARSER", Query_for_list_of_ts_parsers, NULL, THING_NO_SHOW}, {"POLICY", NULL, NULL}, + {"PUBLICATION", NULL, NULL}, {"ROLE", Query_for_list_of_roles}, {"RULE", "SELECT pg_catalog.quote_ident(rulename) FROM pg_catalog.pg_rules WHERE substring(pg_catalog.quote_ident(rulename),1,%d)='%s'"}, {"SCHEMA", Query_for_list_of_schemas}, {"SEQUENCE", NULL, &Query_for_list_of_sequences}, {"SERVER", Query_for_list_of_servers}, + {"SUBSCRIPTION", NULL, NULL}, {"TABLE", NULL, &Query_for_list_of_tables}, {"TABLESPACE", Query_for_list_of_tablespaces}, {"TEMP", NULL, NULL, THING_NO_DROP}, /* for CREATE TEMP TABLE ... */ @@ -1407,8 +1409,8 @@ psql_completion(const char *text, int start, int end) {"AGGREGATE", "COLLATION", "CONVERSION", "DATABASE", "DEFAULT PRIVILEGES", "DOMAIN", "EVENT TRIGGER", "EXTENSION", "FOREIGN DATA WRAPPER", "FOREIGN TABLE", "FUNCTION", "GROUP", "INDEX", "LANGUAGE", "LARGE OBJECT", "MATERIALIZED VIEW", "OPERATOR", - "POLICY", "ROLE", "RULE", "SCHEMA", "SERVER", "SEQUENCE", "SYSTEM", "TABLE", - "TABLESPACE", "TEXT SEARCH", "TRIGGER", "TYPE", + "POLICY", "PUBLICATION", "ROLE", "RULE", "SCHEMA", "SERVER", "SEQUENCE", + "SUBSCRIPTION", "SYSTEM", "TABLE", "TABLESPACE", "TEXT SEARCH", "TRIGGER", "TYPE", "USER", "USER MAPPING FOR", "VIEW", NULL}; COMPLETE_WITH_LIST(list_ALTER); @@ -1433,7 +1435,26 @@ psql_completion(const char *text, int start, int end) else COMPLETE_WITH_FUNCTION_ARG(prev2_wd); } - + /* ALTER PUBLICATION ...*/ + else if (Matches3("ALTER","PUBLICATION",MatchAny)) + { + COMPLETE_WITH_LIST5("WITH", "ADD TABLE", "SET TABLE", "DROP TABLE", "OWNER TO"); + } + /* ALTER PUBLICATION .. WITH ( ... */ + else if (HeadMatches3("ALTER", "PUBLICATION",MatchAny) && TailMatches2("WITH", "(")) + { + COMPLETE_WITH_LIST6("PUBLISH INSERT", "NOPUBLISH INSERT", "PUBLISH UPDATE", + "NOPUBLISH UPDATE", "PUBLISH DELETE", "NOPUBLISH DELETE"); + } + /* ALTER SUBSCRIPTION ... */ + else if (Matches3("ALTER","SUBSCRIPTION",MatchAny)) + { + COMPLETE_WITH_LIST6("WITH", "CONNECTION", "SET PUBLICATION", "ENABLE", "DISABLE", "OWNER TO"); + } + else if (HeadMatches3("ALTER", "SUBSCRIPTION", MatchAny) && TailMatches2("WITH", "(")) + { + COMPLETE_WITH_CONST("SLOT NAME"); + } /* ALTER SCHEMA */ else if (Matches3("ALTER", "SCHEMA", MatchAny)) COMPLETE_WITH_LIST2("OWNER TO", "RENAME TO"); @@ -2227,6 +2248,20 @@ psql_completion(const char *text, int start, int end) COMPLETE_WITH_CONST("("); +/* CREATE PUBLICATION */ + else if (Matches3("CREATE", "PUBLICATION", MatchAny)) + COMPLETE_WITH_LIST3("FOR TABLE", "FOR ALL TABLES", "WITH ("); + else if (Matches4("CREATE", "PUBLICATION", MatchAny, "FOR")) + COMPLETE_WITH_LIST2("TABLE", "ALL TABLES"); + /* Complete "CREATE PUBLICATION FOR TABLE
" */ + else if (Matches4("CREATE", "PUBLICATION", MatchAny, "FOR TABLE")) + COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_tables, NULL); + /* Complete "CREATE PUBLICATION [...] WITH" */ + else if (HeadMatches2("CREATE", "PUBLICATION") && TailMatches2("WITH", "(")) + COMPLETE_WITH_LIST2("PUBLISH", "NOPUBLISH"); + else if (HeadMatches2("CREATE", "PUBLICATION") && TailMatches3("WITH", "(", MatchAny)) + COMPLETE_WITH_LIST3("INSERT", "UPDATE", "DELETE"); + /* CREATE RULE */ /* Complete "CREATE RULE " with "AS ON" */ else if (Matches3("CREATE", "RULE", MatchAny)) @@ -2278,6 +2313,16 @@ psql_completion(const char *text, int start, int end) else if (Matches5("CREATE", "TEXT", "SEARCH", "CONFIGURATION", MatchAny)) COMPLETE_WITH_CONST("("); +/* CREATE SUBSCRIPTION */ + else if (Matches3("CREATE", "SUBSCRIPTION", MatchAny)) + COMPLETE_WITH_CONST("CONNECTION"); + else if (Matches5("CREATE", "SUBSCRIPTION", MatchAny, "CONNECTION",MatchAny)) + COMPLETE_WITH_CONST("PUBLICATION"); + /* Complete "CREATE SUBSCRIPTION ... WITH ( " */ + else if (HeadMatches2("CREATE", "SUBSCRIPTION") && TailMatches2("WITH", "(")) + COMPLETE_WITH_LIST5("ENABLED", "DISABLED", "CREATE SLOT", + "NOCREATE SLOT", "SLOT NAME"); + /* CREATE TRIGGER --- is allowed inside CREATE SCHEMA, so use TailMatches */ /* complete CREATE TRIGGER with BEFORE,AFTER,INSTEAD OF */ else if (TailMatches3("CREATE", "TRIGGER", MatchAny)) @@ -2438,7 +2483,7 @@ psql_completion(const char *text, int start, int end) /* DROP */ /* Complete DROP object with CASCADE / RESTRICT */ else if (Matches3("DROP", - "COLLATION|CONVERSION|DOMAIN|EXTENSION|LANGUAGE|SCHEMA|SEQUENCE|SERVER|TABLE|TYPE|VIEW", + "COLLATION|CONVERSION|DOMAIN|EXTENSION|LANGUAGE|PUBLICATION|SCHEMA|SEQUENCE|SERVER|TABLE|TYPE|VIEW", MatchAny) || Matches4("DROP", "ACCESS", "METHOD", MatchAny) || (Matches4("DROP", "AGGREGATE|FUNCTION", MatchAny, MatchAny) && diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index ee7957418c..10759c7c58 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -161,6 +161,9 @@ typedef enum ObjectClass OCLASS_EXTENSION, /* pg_extension */ OCLASS_EVENT_TRIGGER, /* pg_event_trigger */ OCLASS_POLICY, /* pg_policy */ + OCLASS_PUBLICATION, /* pg_publication */ + OCLASS_PUBLICATION_REL, /* pg_publication_rel */ + OCLASS_SUBSCRIPTION, /* pg_subscription */ OCLASS_TRANSFORM /* pg_transform */ } ObjectClass; diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 674382f909..45605a0dfd 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -325,6 +325,24 @@ DECLARE_UNIQUE_INDEX(pg_replication_origin_roname_index, 6002, on pg_replication DECLARE_UNIQUE_INDEX(pg_partitioned_table_partrelid_index, 3351, on pg_partitioned_table using btree(partrelid oid_ops)); #define PartitionedRelidIndexId 3351 +DECLARE_UNIQUE_INDEX(pg_publication_oid_index, 6110, on pg_publication using btree(oid oid_ops)); +#define PublicationObjectIndexId 6110 + +DECLARE_UNIQUE_INDEX(pg_publication_pubname_index, 6111, on pg_publication using btree(pubname name_ops)); +#define PublicationNameIndexId 6111 + +DECLARE_UNIQUE_INDEX(pg_publication_rel_oid_index, 6112, on pg_publication_rel using btree(oid oid_ops)); +#define PublicationRelObjectIndexId 6112 + +DECLARE_UNIQUE_INDEX(pg_publication_rel_map_index, 6113, on pg_publication_rel using btree(prrelid oid_ops, prpubid oid_ops)); +#define PublicationRelMapIndexId 6113 + +DECLARE_UNIQUE_INDEX(pg_subscription_oid_index, 6114, on pg_subscription using btree(oid oid_ops)); +#define SubscriptionObjectIndexId 6114 + +DECLARE_UNIQUE_INDEX(pg_subscription_subname_index, 6115, on pg_subscription using btree(subdbid oid_ops, subname name_ops)); +#define SubscriptionNameIndexId 6115 + /* last step of initialization script: build the indexes declared above */ BUILD_INDICES diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 03f55a1cc5..ab12761643 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2776,6 +2776,8 @@ DATA(insert OID = 3099 ( pg_stat_get_wal_senders PGNSP PGUID 12 1 10 0 0 f f f DESCR("statistics: information about currently active replication"); DATA(insert OID = 3317 ( pg_stat_get_wal_receiver PGNSP PGUID 12 1 0 0 0 f f f f f f s r 0 0 2249 "" "{23,25,3220,23,3220,23,1184,1184,3220,1184,25,25}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{pid,status,receive_start_lsn,receive_start_tli,received_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,conninfo}" _null_ _null_ pg_stat_get_wal_receiver _null_ _null_ _null_ )); DESCR("statistics: information about WAL receiver"); +DATA(insert OID = 6118 ( pg_stat_get_subscription PGNSP PGUID 12 1 0 0 0 f f f f f f s r 1 0 2249 "26" "{26,26,23,3220,1184,1184,3220,1184}" "{i,o,o,o,o,o,o,o}" "{subid,subid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}" _null_ _null_ pg_stat_get_subscription _null_ _null_ _null_ )); +DESCR("statistics: information about subscription"); DATA(insert OID = 2026 ( pg_backend_pid PGNSP PGUID 12 1 0 0 0 f f f f t f s r 0 0 23 "" _null_ _null_ _null_ _null_ _null_ pg_backend_pid _null_ _null_ _null_ )); DESCR("statistics: current backend PID"); DATA(insert OID = 1937 ( pg_stat_get_backend_pid PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 23 "23" _null_ _null_ _null_ _null_ _null_ pg_stat_get_backend_pid _null_ _null_ _null_ )); @@ -5327,6 +5329,10 @@ DESCR("get an individual replication origin's replication progress"); DATA(insert OID = 6014 ( pg_show_replication_origin_status PGNSP PGUID 12 1 100 0 0 f f f f f t v r 0 0 2249 "" "{26,25,3220,3220}" "{o,o,o,o}" "{local_id, external_id, remote_lsn, local_lsn}" _null_ _null_ pg_show_replication_origin_status _null_ _null_ _null_ )); DESCR("get progress for all replication origins"); +/* publications */ +DATA(insert OID = 6119 ( pg_get_publication_tables PGNSP PGUID 12 1 1000 0 0 f f t f t t s s 1 0 26 "25" "{25,26}" "{i,o}" "{pubname,relid}" _null_ _null_ pg_get_publication_tables _null_ _null_ _null_ )); +DESCR("get OIDs of tables in a publication"); + /* rls */ DATA(insert OID = 3298 ( row_security_active PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ row_security_active _null_ _null_ _null_ )); DESCR("row security for current context active on table by table oid"); diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h new file mode 100644 index 0000000000..f157d0f407 --- /dev/null +++ b/src/include/catalog/pg_publication.h @@ -0,0 +1,104 @@ +/*------------------------------------------------------------------------- + * + * pg_publication.h + * definition of the relation sets relation (pg_publication) + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_publication.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PUBLICATION_H +#define PG_PUBLICATION_H + +#include "catalog/genbki.h" +#include "catalog/objectaddress.h" + +/* ---------------- + * pg_publication definition. cpp turns this into + * typedef struct FormData_pg_publication + * + * ---------------- + */ +#define PublicationRelationId 6104 + +CATALOG(pg_publication,6104) +{ + NameData pubname; /* name of the publication */ + + Oid pubowner; /* publication owner */ + + /* + * indicates that this is special publication which should encompass + * all tables in the database (except for the unlogged and temp ones) + */ + bool puballtables; + + /* true if inserts are published */ + bool pubinsert; + + /* true if updates are published */ + bool pubupdate; + + /* true if deletes are published */ + bool pubdelete; + +} FormData_pg_publication; + +/* ---------------- + * Form_pg_publication corresponds to a pointer to a tuple with + * the format of pg_publication relation. + * ---------------- + */ +typedef FormData_pg_publication *Form_pg_publication; + +/* ---------------- + * compiler constants for pg_publication + * ---------------- + */ + +#define Natts_pg_publication 6 +#define Anum_pg_publication_pubname 1 +#define Anum_pg_publication_pubowner 2 +#define Anum_pg_publication_puballtables 3 +#define Anum_pg_publication_pubinsert 4 +#define Anum_pg_publication_pubupdate 5 +#define Anum_pg_publication_pubdelete 6 + +typedef struct PublicationActions +{ + bool pubinsert; + bool pubupdate; + bool pubdelete; +} PublicationActions; + +typedef struct Publication +{ + Oid oid; + char *name; + bool alltables; + PublicationActions pubactions; +} Publication; + +extern Publication *GetPublication(Oid pubid); +extern Publication *GetPublicationByName(const char *pubname, bool missing_ok); +extern List *GetRelationPublications(Oid relid); +extern List *GetPublicationRelations(Oid pubid); +extern List *GetAllTablesPublications(void); +extern List *GetAllTablesPublicationRelations(void); + +extern ObjectAddress publication_add_relation(Oid pubid, Relation targetrel, + bool if_not_exists); + +extern Oid get_publication_oid(const char *pubname, bool missing_ok); +extern char *get_publication_name(Oid pubid); + +extern Datum pg_get_publication_tables(PG_FUNCTION_ARGS); + +#endif /* PG_PUBLICATION_H */ diff --git a/src/include/catalog/pg_publication_rel.h b/src/include/catalog/pg_publication_rel.h new file mode 100644 index 0000000000..ac09e29669 --- /dev/null +++ b/src/include/catalog/pg_publication_rel.h @@ -0,0 +1,52 @@ +/*------------------------------------------------------------------------- + * + * pg_publication_rel.h + * definition of the publication to relation map (pg_publication_rel) + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_publication_rel.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_PUBLICATION_REL_H +#define PG_PUBLICATION_REL_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_publication_rel definition. cpp turns this into + * typedef struct FormData_pg_publication_rel + * + * ---------------- + */ +#define PublicationRelRelationId 6106 + +CATALOG(pg_publication_rel,6106) +{ + Oid prpubid; /* Oid of the publication */ + Oid prrelid; /* Oid of the relation */ +} FormData_pg_publication_rel; + +/* ---------------- + * Form_pg_publication_rel corresponds to a pointer to a tuple with + * the format of pg_publication_rel relation. + * ---------------- + */ +typedef FormData_pg_publication_rel *Form_pg_publication_rel; + +/* ---------------- + * compiler constants for pg_publication_rel + * ---------------- + */ + +#define Natts_pg_publication_rel 2 +#define Anum_pg_publication_rel_prpubid 1 +#define Anum_pg_publication_rel_prrelid 2 + +#endif /* PG_PUBLICATION_REL_H */ diff --git a/src/include/catalog/pg_subscription.h b/src/include/catalog/pg_subscription.h new file mode 100644 index 0000000000..0ad7b0e321 --- /dev/null +++ b/src/include/catalog/pg_subscription.h @@ -0,0 +1,83 @@ +/* ------------------------------------------------------------------------- + * + * pg_subscription.h + * Definition of the subscription catalog (pg_subscription). + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * ------------------------------------------------------------------------- + */ +#ifndef PG_SUBSCRIPTION_H +#define PG_SUBSCRIPTION_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_subscription definition. cpp turns this into + * typedef struct FormData_pg_subscription + * ---------------- + */ +#define SubscriptionRelationId 6100 +#define SubscriptionRelation_Rowtype_Id 6101 + +/* + * Technicaly, the subscriptions live inside the database, so a shared catalog + * seems weird, but the replication launcher process needs to access all of + * them to be able to start the workers, so we have to put them in a shared, + * nailed catalog. + */ +CATALOG(pg_subscription,6100) BKI_SHARED_RELATION BKI_ROWTYPE_OID(6101) BKI_SCHEMA_MACRO +{ + Oid subdbid; /* Database the subscription is in. */ + NameData subname; /* Name of the subscription */ + + Oid subowner; /* Owner of the subscription */ + + bool subenabled; /* True if the subsription is enabled + * (the worker should be running) */ + +#ifdef CATALOG_VARLEN /* variable-length fields start here */ + text subconninfo; /* Connection string to the publisher */ + NameData subslotname; /* Slot name on publisher */ + + text subpublications[1]; /* List of publications subscribed to */ +#endif +} FormData_pg_subscription; + +typedef FormData_pg_subscription *Form_pg_subscription; + +/* ---------------- + * compiler constants for pg_subscription + * ---------------- + */ +#define Natts_pg_subscription 7 +#define Anum_pg_subscription_subdbid 1 +#define Anum_pg_subscription_subname 2 +#define Anum_pg_subscription_subowner 3 +#define Anum_pg_subscription_subenabled 4 +#define Anum_pg_subscription_subconninfo 5 +#define Anum_pg_subscription_subslotname 6 +#define Anum_pg_subscription_subpublications 7 + + +typedef struct Subscription +{ + Oid oid; /* Oid of the subscription */ + Oid dbid; /* Oid of the database which dubscription is in */ + char *name; /* Name of the subscription */ + Oid owner; /* Oid of the subscription owner */ + bool enabled; /* Indicates if the subscription is enabled */ + char *conninfo; /* Connection string to the publisher */ + char *slotname; /* Name of the replication slot */ + List *publications; /* List of publication names to subscribe to */ +} Subscription; + +extern Subscription *GetSubscription(Oid subid, bool missing_ok); +extern void FreeSubscription(Subscription *sub); +extern Oid get_subscription_oid(const char *subname, bool missing_ok); +extern char *get_subscription_name(Oid subid); + +extern int CountDBSubscriptions(Oid dbid); + +#endif /* PG_SUBSCRIPTION_H */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 6eb4f95fdd..558a91d06f 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -154,5 +154,6 @@ extern int64 defGetInt64(DefElem *def); extern List *defGetQualifiedName(DefElem *def); extern TypeName *defGetTypeName(DefElem *def); extern int defGetTypeLength(DefElem *def); +extern List *defGetStringList(DefElem *def); #endif /* DEFREM_H */ diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h new file mode 100644 index 0000000000..2307cea070 --- /dev/null +++ b/src/include/commands/publicationcmds.h @@ -0,0 +1,28 @@ +/*------------------------------------------------------------------------- + * + * publicationcmds.h + * prototypes for publicationcmds.c. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/commands/publicationcmds.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PUBLICATIONCMDS_H +#define PUBLICATIONCMDS_H + +#include "nodes/parsenodes.h" + +extern ObjectAddress CreatePublication(CreatePublicationStmt *stmt); +extern void AlterPublication(AlterPublicationStmt *stmt); +extern void RemovePublicationById(Oid pubid); +extern void RemovePublicationRelById(Oid proid); + +extern ObjectAddress AlterPublicationOwner(const char *name, Oid newOwnerId); +extern void AlterPublicationOwner_oid(Oid pubid, Oid newOwnerId); + +#endif /* PUBLICATIONCMDS_H */ diff --git a/src/include/commands/subscriptioncmds.h b/src/include/commands/subscriptioncmds.h new file mode 100644 index 0000000000..1d8e2aa412 --- /dev/null +++ b/src/include/commands/subscriptioncmds.h @@ -0,0 +1,27 @@ +/*------------------------------------------------------------------------- + * + * subscriptioncmds.h + * prototypes for subscriptioncmds.c. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/commands/subscriptioncmds.h + * + *------------------------------------------------------------------------- + */ + +#ifndef SUBSCRIPTIONCMDS_H +#define SUBSCRIPTIONCMDS_H + +#include "nodes/parsenodes.h" + +extern ObjectAddress CreateSubscription(CreateSubscriptionStmt *stmt); +extern ObjectAddress AlterSubscription(AlterSubscriptionStmt *stmt); +extern void DropSubscription(DropSubscriptionStmt *stmt); + +extern ObjectAddress AlterSubscriptionOwner(const char *name, Oid newOwnerId); +extern void AlterSubscriptionOwner_oid(Oid subid, Oid newOwnerId); + +#endif /* SUBSCRIPTIONCMDS_H */ diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 74aa63536b..02dbe7b228 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -392,5 +392,22 @@ extern void check_exclusion_constraint(Relation heap, Relation index, Datum *values, bool *isnull, EState *estate, bool newIndex); +/* + * prototypes from functions in execReplication.c + */ +extern bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, + LockTupleMode lockmode, + TupleTableSlot *searchslot, + TupleTableSlot *outslot); +extern bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, + TupleTableSlot *searchslot, TupleTableSlot *outslot); + +extern void ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot); +extern void ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate, + TupleTableSlot *searchslot, TupleTableSlot *slot); +extern void ExecSimpleRelationDelete(EState *estate, EPQState *epqstate, + TupleTableSlot *searchslot); +extern void CheckCmdReplicaIdentity(Relation rel, CmdType cmd); + #endif /* EXECUTOR_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index d65958153d..fa4932a902 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -411,6 +411,11 @@ typedef enum NodeTag T_CreateTransformStmt, T_CreateAmStmt, T_PartitionCmd, + T_CreatePublicationStmt, + T_AlterPublicationStmt, + T_CreateSubscriptionStmt, + T_AlterSubscriptionStmt, + T_DropSubscriptionStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index edb5cd2152..aad4699f48 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1547,10 +1547,13 @@ typedef enum ObjectType OBJECT_OPERATOR, OBJECT_OPFAMILY, OBJECT_POLICY, + OBJECT_PUBLICATION, + OBJECT_PUBLICATION_REL, OBJECT_ROLE, OBJECT_RULE, OBJECT_SCHEMA, OBJECT_SEQUENCE, + OBJECT_SUBSCRIPTION, OBJECT_TABCONSTRAINT, OBJECT_TABLE, OBJECT_TABLESPACE, @@ -3248,4 +3251,52 @@ typedef struct AlterTSConfigurationStmt bool missing_ok; /* for DROP - skip error if missing? */ } AlterTSConfigurationStmt; + +typedef struct CreatePublicationStmt +{ + NodeTag type; + char *pubname; /* Name of of the publication */ + List *options; /* List of DefElem nodes */ + List *tables; /* Optional list of tables to add */ + bool for_all_tables; /* Special publication for all tables in db */ +} CreatePublicationStmt; + +typedef struct AlterPublicationStmt +{ + NodeTag type; + char *pubname; /* Name of of the publication */ + + /* parameters used for ALTER PUBLICATION ... WITH */ + List *options; /* List of DefElem nodes */ + + /* parameters used for ALTER PUBLICATION ... ADD/DROP TABLE */ + List *tables; /* List of tables to add/drop */ + bool for_all_tables; /* Special publication for all tables in db */ + DefElemAction tableAction; /* What action to perform with the tables */ +} AlterPublicationStmt; + +typedef struct CreateSubscriptionStmt +{ + NodeTag type; + char *subname; /* Name of of the subscription */ + char *conninfo; /* Connection string to publisher */ + List *publication; /* One or more publication to subscribe to */ + List *options; /* List of DefElem nodes */ +} CreateSubscriptionStmt; + +typedef struct AlterSubscriptionStmt +{ + NodeTag type; + char *subname; /* Name of of the subscription */ + List *options; /* List of DefElem nodes */ +} AlterSubscriptionStmt; + +typedef struct DropSubscriptionStmt +{ + NodeTag type; + char *subname; /* Name of of the subscription */ + bool drop_slot; /* Should we drop the slot on remote side? */ + bool missing_ok; /* Skip error if missing? */ +} DropSubscriptionStmt; + #endif /* PARSENODES_H */ diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 9978573e8b..985d6505ec 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -308,6 +308,7 @@ PG_KEYWORD("privileges", PRIVILEGES, UNRESERVED_KEYWORD) PG_KEYWORD("procedural", PROCEDURAL, UNRESERVED_KEYWORD) PG_KEYWORD("procedure", PROCEDURE, UNRESERVED_KEYWORD) PG_KEYWORD("program", PROGRAM, UNRESERVED_KEYWORD) +PG_KEYWORD("publication", PUBLICATION, UNRESERVED_KEYWORD) PG_KEYWORD("quote", QUOTE, UNRESERVED_KEYWORD) PG_KEYWORD("range", RANGE, UNRESERVED_KEYWORD) PG_KEYWORD("read", READ, UNRESERVED_KEYWORD) @@ -360,6 +361,7 @@ PG_KEYWORD("show", SHOW, UNRESERVED_KEYWORD) PG_KEYWORD("similar", SIMILAR, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("simple", SIMPLE, UNRESERVED_KEYWORD) PG_KEYWORD("skip", SKIP, UNRESERVED_KEYWORD) +PG_KEYWORD("slot", SLOT, UNRESERVED_KEYWORD) PG_KEYWORD("smallint", SMALLINT, COL_NAME_KEYWORD) PG_KEYWORD("snapshot", SNAPSHOT, UNRESERVED_KEYWORD) PG_KEYWORD("some", SOME, RESERVED_KEYWORD) @@ -374,6 +376,7 @@ PG_KEYWORD("stdout", STDOUT, UNRESERVED_KEYWORD) PG_KEYWORD("storage", STORAGE, UNRESERVED_KEYWORD) PG_KEYWORD("strict", STRICT_P, UNRESERVED_KEYWORD) PG_KEYWORD("strip", STRIP_P, UNRESERVED_KEYWORD) +PG_KEYWORD("subscription", SUBSCRIPTION, UNRESERVED_KEYWORD) PG_KEYWORD("substring", SUBSTRING, COL_NAME_KEYWORD) PG_KEYWORD("symmetric", SYMMETRIC, RESERVED_KEYWORD) PG_KEYWORD("sysid", SYSID, UNRESERVED_KEYWORD) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5b37894cb5..de8225b989 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -745,7 +745,9 @@ typedef enum WAIT_EVENT_SYSLOGGER_MAIN, WAIT_EVENT_WAL_RECEIVER_MAIN, WAIT_EVENT_WAL_SENDER_MAIN, - WAIT_EVENT_WAL_WRITER_MAIN + WAIT_EVENT_WAL_WRITER_MAIN, + WAIT_EVENT_LOGICAL_LAUNCHER_MAIN, + WAIT_EVENT_LOGICAL_APPLY_MAIN } WaitEventActivity; /* ---------- diff --git a/src/include/replication/logicallauncher.h b/src/include/replication/logicallauncher.h new file mode 100644 index 0000000000..715ac7f24c --- /dev/null +++ b/src/include/replication/logicallauncher.h @@ -0,0 +1,27 @@ +/*------------------------------------------------------------------------- + * + * logicallauncher.h + * Exports for logical replication launcher. + * + * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group + * + * src/include/replication/logicallauncher.h + * + *------------------------------------------------------------------------- + */ +#ifndef LOGICALLAUNCHER_H +#define LOGICALLAUNCHER_H + +extern int max_logical_replication_workers; + +extern void ApplyLauncherRegister(void); +extern void ApplyLauncherMain(Datum main_arg); + +extern Size ApplyLauncherShmemSize(void); +extern void ApplyLauncherShmemInit(void); + +extern void ApplyLauncherWakeup(void); +extern void ApplyLauncherWakeupAtCommit(void); +extern void AtCommit_ApplyLauncher(void); + +#endif /* LOGICALLAUNCHER_H */ diff --git a/src/include/replication/logicalproto.h b/src/include/replication/logicalproto.h new file mode 100644 index 0000000000..0d8153c39d --- /dev/null +++ b/src/include/replication/logicalproto.h @@ -0,0 +1,104 @@ +/*------------------------------------------------------------------------- + * + * logicalproto.h + * logical replication protocol + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/replication/logicalproto.h + * + *------------------------------------------------------------------------- + */ +#ifndef LOGICAL_PROTO_H +#define LOGICAL_PROTO_H + +#include "replication/reorderbuffer.h" +#include "utils/rel.h" + +/* + * Protocol capabilities + * + * LOGICAL_PROTO_VERSION_NUM is our native protocol and the greatest version + * we can support. PGLOGICAL_PROTO_MIN_VERSION_NUM is the oldest version we + * have backwards compatibility for. The client requests protocol version at + * connect time. + */ +#define LOGICALREP_PROTO_MIN_VERSION_NUM 1 +#define LOGICALREP_PROTO_VERSION_NUM 1 + +/* Tuple coming via logical replication. */ +typedef struct LogicalRepTupleData +{ + char *values[MaxTupleAttributeNumber]; /* value in out function format or NULL if values is NULL */ + bool changed[MaxTupleAttributeNumber]; /* marker for changed/unchanged values */ +} LogicalRepTupleData; + +typedef uint32 LogicalRepRelId; + +/* Relation information */ +typedef struct LogicalRepRelation +{ + /* Info coming from the remote side. */ + LogicalRepRelId remoteid; /* unique id of the relation */ + char *nspname; /* schema name */ + char *relname; /* relation name */ + int natts; /* number of columns */ + char **attnames; /* column names */ + Oid *atttyps; /* column types */ + char replident; /* replica identity */ + Bitmapset *attkeys; /* Bitmap of key columns */ +} LogicalRepRelation; + +/* Type mapping info */ +typedef struct LogicalRepTyp +{ + Oid remoteid; /* unique id of the type */ + char *nspname; /* schema name */ + char *typname; /* name of the type */ + Oid typoid; /* local type Oid */ +} LogicalRepTyp; + +/* Transaction info */ +typedef struct LogicalRepBeginData +{ + XLogRecPtr final_lsn; + TimestampTz committime; + TransactionId xid; +} LogicalRepBeginData; + +typedef struct LogicalRepCommitData +{ + XLogRecPtr commit_lsn; + XLogRecPtr end_lsn; + TimestampTz committime; +} LogicalRepCommitData; + +extern void logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn); +extern void logicalrep_read_begin(StringInfo in, + LogicalRepBeginData *begin_data); +extern void logicalrep_write_commit(StringInfo out, ReorderBufferTXN *txn, + XLogRecPtr commit_lsn); +extern void logicalrep_read_commit(StringInfo in, + LogicalRepCommitData *commit_data); +extern void logicalrep_write_origin(StringInfo out, const char *origin, + XLogRecPtr origin_lsn); +extern char *logicalrep_read_origin(StringInfo in, XLogRecPtr *origin_lsn); +extern void logicalrep_write_insert(StringInfo out, Relation rel, + HeapTuple newtuple); +extern LogicalRepRelId logicalrep_read_insert(StringInfo in, LogicalRepTupleData *newtup); +extern void logicalrep_write_update(StringInfo out, Relation rel, HeapTuple oldtuple, + HeapTuple newtuple); +extern LogicalRepRelId logicalrep_read_update(StringInfo in, + bool *has_oldtuple, LogicalRepTupleData *oldtup, + LogicalRepTupleData *newtup); +extern void logicalrep_write_delete(StringInfo out, Relation rel, + HeapTuple oldtuple); +extern LogicalRepRelId logicalrep_read_delete(StringInfo in, + LogicalRepTupleData *oldtup); +extern void logicalrep_write_rel(StringInfo out, Relation rel); +extern LogicalRepRelation *logicalrep_read_rel(StringInfo in); +extern void logicalrep_write_typ(StringInfo out, Oid typoid); +extern void logicalrep_read_typ(StringInfo out, LogicalRepTyp *ltyp); + +#endif /* LOGICALREP_PROTO_H */ diff --git a/src/include/replication/logicalrelation.h b/src/include/replication/logicalrelation.h new file mode 100644 index 0000000000..8f9f4a094d --- /dev/null +++ b/src/include/replication/logicalrelation.h @@ -0,0 +1,43 @@ +/*------------------------------------------------------------------------- + * + * logicalrelation.h + * Relation definitions for logical replication relation mapping. + * + * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group + * + * src/include/replication/logicalrelation.h + * + *------------------------------------------------------------------------- + */ +#ifndef LOGICALRELATION_H +#define LOGICALRELATION_H + +#include "replication/logicalproto.h" + +typedef struct LogicalRepRelMapEntry +{ + LogicalRepRelation remoterel; /* key is remoterel.remoteid */ + + /* Mapping to local relation, filled as needed. */ + Oid localreloid; /* local relation id */ + Relation localrel; /* relcache entry */ + AttrNumber *attrmap; /* map of local attributes to + * remote ones */ + bool updatable; /* Can apply updates/detetes? */ + + /* Sync state. */ + char state; + XLogRecPtr statelsn; +} LogicalRepRelMapEntry; + +extern void logicalrep_relmap_update(LogicalRepRelation *remoterel); + +extern LogicalRepRelMapEntry *logicalrep_rel_open(LogicalRepRelId remoteid, + LOCKMODE lockmode); +extern void logicalrep_rel_close(LogicalRepRelMapEntry *rel, + LOCKMODE lockmode); + +extern void logicalrep_typmap_update(LogicalRepTyp *remotetyp); +extern Oid logicalrep_typmap_getid(Oid remoteid); + +#endif /* LOGICALRELATION_H */ diff --git a/src/include/replication/logicalworker.h b/src/include/replication/logicalworker.h new file mode 100644 index 0000000000..93cb25f438 --- /dev/null +++ b/src/include/replication/logicalworker.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * + * logicalworker.h + * Exports for logical replication workers. + * + * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group + * + * src/include/replication/logicalworker.h + * + *------------------------------------------------------------------------- + */ +#ifndef LOGICALWORKER_H +#define LOGICALWORKER_H + +extern void ApplyWorkerMain(Datum main_arg); + +#endif /* LOGICALWORKER_H */ diff --git a/src/include/replication/pgoutput.h b/src/include/replication/pgoutput.h new file mode 100644 index 0000000000..c20451d1f2 --- /dev/null +++ b/src/include/replication/pgoutput.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * pgoutput.h + * Logical Replication output plugin + * + * Copyright (c) 2015, PostgreSQL Global Development Group + * + * IDENTIFICATION + * pgoutput.h + * + *------------------------------------------------------------------------- + */ +#ifndef PGOUTPUT_H +#define PGOUTPUT_H + + +typedef struct PGOutputData +{ + MemoryContext context; /* private memory context for transient + * allocations */ + + /* client info */ + uint32 protocol_version; + + List *publication_names; + List *publications; +} PGOutputData; + +#endif /* PGOUTPUT_H */ diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h index 6ab2c6f9a5..0857bdc556 100644 --- a/src/include/replication/walreceiver.h +++ b/src/include/replication/walreceiver.h @@ -134,34 +134,64 @@ typedef struct extern WalRcvData *WalRcv; +typedef struct +{ + bool logical; /* True if this is logical + replication stream, false if + physical stream. */ + char *slotname; /* Name of the replication slot + or NULL. */ + XLogRecPtr startpoint; /* LSN of starting point. */ + + union + { + struct + { + TimeLineID startpointTLI; /* Starting timeline */ + } physical; + struct + { + uint32 proto_version; /* Logical protocol version */ + List *publication_names; /* String list of publications */ + } logical; + } proto; +} WalRcvStreamOptions; + struct WalReceiverConn; typedef struct WalReceiverConn WalReceiverConn; /* libpqwalreceiver hooks */ typedef WalReceiverConn *(*walrcv_connect_fn) (const char *conninfo, bool logical, - const char *appname); + const char *appname, + char **err); +typedef void (*walrcv_check_conninfo_fn) (const char *conninfo); typedef char *(*walrcv_get_conninfo_fn) (WalReceiverConn *conn); typedef char *(*walrcv_identify_system_fn) (WalReceiverConn *conn, - TimeLineID *primary_tli); + TimeLineID *primary_tli, + int *server_version); typedef void (*walrcv_readtimelinehistoryfile_fn) (WalReceiverConn *conn, TimeLineID tli, char **filename, char **content, int *size); typedef bool (*walrcv_startstreaming_fn) (WalReceiverConn *conn, - TimeLineID tli, - XLogRecPtr startpoint, - const char *slotname); + const WalRcvStreamOptions *options); typedef void (*walrcv_endstreaming_fn) (WalReceiverConn *conn, TimeLineID *next_tli); typedef int (*walrcv_receive_fn) (WalReceiverConn *conn, char **buffer, pgsocket *wait_fd); typedef void (*walrcv_send_fn) (WalReceiverConn *conn, const char *buffer, int nbytes); +typedef char *(*walrcv_create_slot_fn) (WalReceiverConn *conn, + const char *slotname, bool temporary, + XLogRecPtr *lsn); +typedef bool (*walrcv_command_fn) (WalReceiverConn *conn, const char *cmd, + char **err); typedef void (*walrcv_disconnect_fn) (WalReceiverConn *conn); typedef struct WalReceiverFunctionsType { walrcv_connect_fn walrcv_connect; + walrcv_check_conninfo_fn walrcv_check_conninfo; walrcv_get_conninfo_fn walrcv_get_conninfo; walrcv_identify_system_fn walrcv_identify_system; walrcv_readtimelinehistoryfile_fn walrcv_readtimelinehistoryfile; @@ -169,27 +199,35 @@ typedef struct WalReceiverFunctionsType walrcv_endstreaming_fn walrcv_endstreaming; walrcv_receive_fn walrcv_receive; walrcv_send_fn walrcv_send; + walrcv_create_slot_fn walrcv_create_slot; + walrcv_command_fn walrcv_command; walrcv_disconnect_fn walrcv_disconnect; } WalReceiverFunctionsType; extern PGDLLIMPORT WalReceiverFunctionsType *WalReceiverFunctions; -#define walrcv_connect(conninfo, logical, appname) \ - WalReceiverFunctions->walrcv_connect(conninfo, logical, appname) +#define walrcv_connect(conninfo, logical, appname, err) \ + WalReceiverFunctions->walrcv_connect(conninfo, logical, appname, err) +#define walrcv_check_conninfo(conninfo) \ + WalReceiverFunctions->walrcv_check_conninfo(conninfo) #define walrcv_get_conninfo(conn) \ WalReceiverFunctions->walrcv_get_conninfo(conn) -#define walrcv_identify_system(conn, primary_tli) \ - WalReceiverFunctions->walrcv_identify_system(conn, primary_tli) +#define walrcv_identify_system(conn, primary_tli, server_version) \ + WalReceiverFunctions->walrcv_identify_system(conn, primary_tli, server_version) #define walrcv_readtimelinehistoryfile(conn, tli, filename, content, size) \ WalReceiverFunctions->walrcv_readtimelinehistoryfile(conn, tli, filename, content, size) -#define walrcv_startstreaming(conn, tli, startpoint, slotname) \ - WalReceiverFunctions->walrcv_startstreaming(conn, tli, startpoint, slotname) +#define walrcv_startstreaming(conn, options) \ + WalReceiverFunctions->walrcv_startstreaming(conn, options) #define walrcv_endstreaming(conn, next_tli) \ WalReceiverFunctions->walrcv_endstreaming(conn, next_tli) #define walrcv_receive(conn, buffer, wait_fd) \ WalReceiverFunctions->walrcv_receive(conn, buffer, wait_fd) #define walrcv_send(conn, buffer, nbytes) \ WalReceiverFunctions->walrcv_send(conn, buffer, nbytes) +#define walrcv_create_slot(conn, slotname, temporary, lsn) \ + WalReceiverFunctions->walrcv_create_slot(conn, slotname, temporary, lsn) +#define walrcv_command(conn, cmd, err) \ + WalReceiverFunctions->walrcv_command(conn, cmd, err) #define walrcv_disconnect(conn) \ WalReceiverFunctions->walrcv_disconnect(conn) diff --git a/src/include/replication/worker_internal.h b/src/include/replication/worker_internal.h new file mode 100644 index 0000000000..cecd2b8a1c --- /dev/null +++ b/src/include/replication/worker_internal.h @@ -0,0 +1,62 @@ +/*------------------------------------------------------------------------- + * + * worker_internal.h + * Internal headers shared by logical replication workers. + * + * Portions Copyright (c) 2010-2016, PostgreSQL Global Development Group + * + * src/include/replication/worker_internal.h + * + *------------------------------------------------------------------------- + */ +#ifndef WORKER_INTERNAL_H +#define WORKER_INTERNAL_H + +#include "catalog/pg_subscription.h" +#include "storage/lock.h" + +typedef struct LogicalRepWorker +{ + /* Pointer to proc array. NULL if not running. */ + PGPROC *proc; + + /* Database id to connect to. */ + Oid dbid; + + /* User to use for connection (will be same as owner of subscription). */ + Oid userid; + + /* Subscription id for the worker. */ + Oid subid; + + /* Used for initial table synchronization. */ + Oid relid; + + /* Stats. */ + XLogRecPtr last_lsn; + TimestampTz last_send_time; + TimestampTz last_recv_time; + XLogRecPtr reply_lsn; + TimestampTz reply_time; +} LogicalRepWorker; + +/* libpqreceiver connection */ +extern struct WalReceiverConn *wrconn; + +/* Worker and subscription objects. */ +extern Subscription *MySubscription; +extern LogicalRepWorker *MyLogicalRepWorker; + +extern bool in_remote_transaction; +extern bool got_SIGTERM; + +extern void logicalrep_worker_attach(int slot); +extern LogicalRepWorker *logicalrep_worker_find(Oid subid); +extern int logicalrep_worker_count(Oid subid); +extern void logicalrep_worker_launch(Oid dbid, Oid subid, const char *subname, Oid userid); +extern void logicalrep_worker_stop(Oid subid); +extern void logicalrep_worker_wakeup(Oid subid); + +extern void logicalrep_worker_sigterm(SIGNAL_ARGS); + +#endif /* WORKER_INTERNAL_H */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index b7bab68e21..6a3db9580f 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -23,6 +23,7 @@ * * invalidate a specific tuple in a specific catcache * * invalidate all catcache entries from a given system catalog * * invalidate a relcache entry for a specific logical relation + * * invalidate all relcache entries * * invalidate an smgr cache entry for a specific physical relation * * invalidate the mapped-relation mapping for a given database * * invalidate any saved snapshot that might be used to scan a given relation @@ -78,7 +79,7 @@ typedef struct { int8 id; /* type field --- must be first */ Oid dbId; /* database ID, or 0 if a shared relation */ - Oid relId; /* relation ID */ + Oid relId; /* relation ID, or 0 if whole relcache */ } SharedInvalRelcacheMsg; #define SHAREDINVALSMGR_ID (-3) diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index f397ea1d54..686141b5f9 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -199,6 +199,8 @@ typedef enum AclObjectKind ACL_KIND_FOREIGN_SERVER, /* pg_foreign_server */ ACL_KIND_EVENT_TRIGGER, /* pg_event_trigger */ ACL_KIND_EXTENSION, /* pg_extension */ + ACL_KIND_PUBLICATION, /* pg_publication */ + ACL_KIND_SUBSCRIPTION, /* pg_subscription */ MAX_ACL_KIND /* MUST BE LAST */ } AclObjectKind; @@ -318,6 +320,8 @@ extern bool pg_foreign_data_wrapper_ownercheck(Oid srv_oid, Oid roleid); extern bool pg_foreign_server_ownercheck(Oid srv_oid, Oid roleid); extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid); extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid); +extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid); +extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid); extern bool has_createrole_privilege(Oid roleid); extern bool has_bypassrls_privilege(Oid roleid); diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index d036e9f1ae..afbe354b4d 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -43,6 +43,8 @@ extern void CacheInvalidateCatalog(Oid catalogId); extern void CacheInvalidateRelcache(Relation relation); +extern void CacheInvalidateRelcacheAll(void); + extern void CacheInvalidateRelcacheByTuple(HeapTuple classTuple); extern void CacheInvalidateRelcacheByRelid(Oid relid); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index efef1cedfe..a1750accc2 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -18,6 +18,7 @@ #include "access/xlog.h" #include "catalog/pg_class.h" #include "catalog/pg_index.h" +#include "catalog/pg_publication.h" #include "fmgr.h" #include "nodes/bitmapset.h" #include "rewrite/prs2lock.h" @@ -132,13 +133,17 @@ typedef struct RelationData /* data managed by RelationGetIndexList: */ List *rd_indexlist; /* list of OIDs of indexes on relation */ Oid rd_oidindex; /* OID of unique index on OID, if any */ + Oid rd_pkindex; /* OID of primary key, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ + Bitmapset *rd_pkattr; /* cols included in primary key */ Bitmapset *rd_idattr; /* included in replica identity index */ + PublicationActions *rd_pubactions; /* publication actions */ + /* * rd_options is set whenever rd_rel is loaded into the relcache entry. * Note that you can NOT look into rd_rel for this data. NULL means "use @@ -627,5 +632,6 @@ get_partition_col_typmod(PartitionKey key, int col) extern void RelationIncrementReferenceCount(Relation rel); extern void RelationDecrementReferenceCount(Relation rel); extern bool RelationHasUnloggedIndex(Relation rel); +extern List *RelationGetRepsetList(Relation rel); #endif /* REL_H */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index a2157b1562..da36b6774f 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -40,6 +40,7 @@ extern void RelationClose(Relation relation); extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); +extern Oid RelationGetPrimaryKeyIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); extern List *RelationGetIndexExpressions(Relation relation); extern List *RelationGetIndexPredicate(Relation relation); @@ -48,6 +49,7 @@ typedef enum IndexAttrBitmapKind { INDEX_ATTR_BITMAP_ALL, INDEX_ATTR_BITMAP_KEY, + INDEX_ATTR_BITMAP_PRIMARY_KEY, INDEX_ATTR_BITMAP_IDENTITY_KEY } IndexAttrBitmapKind; @@ -64,6 +66,10 @@ extern void RelationSetIndexList(Relation relation, extern void RelationInitIndexAccessInfo(Relation relation); +/* caller must include pg_publication.h */ +struct PublicationActions; +extern struct PublicationActions *GetRelationPublicationActions(Relation relation); + /* * Routines to support ereport() reports of relation-related errors */ diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index e4bb62b1e4..66f60d271e 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -80,9 +80,15 @@ enum SysCacheIdentifier RELOID, REPLORIGIDENT, REPLORIGNAME, + PUBLICATIONOID, + PUBLICATIONNAME, + PUBLICATIONREL, + PUBLICATIONRELMAP, RULERELNAME, SEQRELID, STATRELATTINH, + SUBSCRIPTIONOID, + SUBSCRIPTIONNAME, TABLESPACEOID, TRFOID, TRFTYPELANG, diff --git a/src/test/Makefile b/src/test/Makefile index 6b40cf50ed..3c2215849e 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,7 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl regress isolation modules recovery +SUBDIRS = perl regress isolation modules recovery subscription # We don't build or execute examples/, locale/, or thread/ by default, # but we do want "make clean" etc to recurse into them. Likewise for ssl/, diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 932478183a..18d5d12454 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -380,7 +380,9 @@ WAL archiving can be enabled on this node by passing the keyword parameter has_archiving => 1. This is disabled by default. postgresql.conf can be set up for replication by passing the keyword -parameter allows_streaming => 1. This is disabled by default. +parameter allows_streaming => 'logical' or 'physical' (passing 1 will also +suffice for physical replication) depending on type of replication that +should be enabled. This is disabled by default. The new node is set up in a fast but unsafe configuration where fsync is disabled. @@ -415,7 +417,16 @@ sub init if ($params{allows_streaming}) { + if ($params{allows_streaming} eq "logical") + { + print $conf "wal_level = logical\n"; + } + else + { + print $conf "wal_level = replica\n"; + } print $conf "max_wal_senders = 5\n"; + print $conf "max_replication_slots = 5\n"; print $conf "wal_keep_segments = 20\n"; print $conf "max_wal_size = 128MB\n"; print $conf "shared_buffers = 1MB\n"; diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out new file mode 100644 index 0000000000..47b04f1a57 --- /dev/null +++ b/src/test/regress/expected/publication.out @@ -0,0 +1,156 @@ +-- +-- PUBLICATION +-- +CREATE ROLE regress_publication_user LOGIN SUPERUSER; +SET SESSION AUTHORIZATION 'regress_publication_user'; +CREATE PUBLICATION testpub_default; +CREATE PUBLICATION testpib_ins_trunct WITH (nopublish delete, nopublish update); +ALTER PUBLICATION testpub_default WITH (nopublish insert, nopublish delete); +\dRp + List of publications + Name | Owner | Inserts | Updates | Deletes +--------------------+--------------------------+---------+---------+--------- + testpib_ins_trunct | regress_publication_user | t | f | f + testpub_default | regress_publication_user | f | t | f +(2 rows) + +ALTER PUBLICATION testpub_default WITH (publish insert, publish delete); +\dRp + List of publications + Name | Owner | Inserts | Updates | Deletes +--------------------+--------------------------+---------+---------+--------- + testpib_ins_trunct | regress_publication_user | t | f | f + testpub_default | regress_publication_user | t | t | t +(2 rows) + +--- adding tables +CREATE SCHEMA pub_test; +CREATE TABLE testpub_tbl1 (id serial primary key, data text); +CREATE TABLE pub_test.testpub_nopk (foo int, bar int); +CREATE VIEW testpub_view AS SELECT 1; +CREATE PUBLICATION testpub_foralltables FOR ALL TABLES WITH (nopublish delete, nopublish update); +ALTER PUBLICATION testpub_foralltables WITH (publish update); +CREATE TABLE testpub_tbl2 (id serial primary key, data text); +-- fail - can't add to for all tables publication +ALTER PUBLICATION testpub_foralltables ADD TABLE testpub_tbl2; +ERROR: publication "testpub_foralltables" is defined as FOR ALL TABLES +DETAIL: Tables cannot be added to or dropped from FOR ALL TABLES publications. +-- fail - can't drop from all tables publication +ALTER PUBLICATION testpub_foralltables DROP TABLE testpub_tbl2; +ERROR: publication "testpub_foralltables" is defined as FOR ALL TABLES +DETAIL: Tables cannot be added to or dropped from FOR ALL TABLES publications. +-- fail - can't add to for all tables publication +ALTER PUBLICATION testpub_foralltables SET TABLE pub_test.testpub_nopk; +ERROR: publication "testpub_foralltables" is defined as FOR ALL TABLES +DETAIL: Tables cannot be added to or dropped from FOR ALL TABLES publications. +SELECT pubname, puballtables FROM pg_publication WHERE pubname = 'testpub_foralltables'; + pubname | puballtables +----------------------+-------------- + testpub_foralltables | t +(1 row) + +\d+ testpub_tbl2 + Table "public.testpub_tbl2" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------------+----------+--------------+------------- + id | integer | | not null | nextval('testpub_tbl2_id_seq'::regclass) | plain | | + data | text | | | | extended | | +Indexes: + "testpub_tbl2_pkey" PRIMARY KEY, btree (id) +Publications: + "testpub_foralltables" + +DROP TABLE testpub_tbl2; +DROP PUBLICATION testpub_foralltables; +-- fail - view +CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_view; +ERROR: "testpub_view" is not a table +DETAIL: Only tables can be added to publications. +CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1, pub_test.testpub_nopk; +-- fail - already added +ALTER PUBLICATION testpub_fortbl ADD TABLE testpub_tbl1; +ERROR: relation "testpub_tbl1" is already member of publication "testpub_fortbl" +-- fail - already added +CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1; +ERROR: publication "testpub_fortbl" already exists +\dRp+ testpub_fortbl + Publication testpub_fortbl + Inserts | Updates | Deletes +---------+---------+--------- + t | t | t +Tables: + "pub_test.testpub_nopk" + "public.testpub_tbl1" + +-- fail - view +ALTER PUBLICATION testpub_default ADD TABLE testpub_view; +ERROR: "testpub_view" is not a table +DETAIL: Only tables can be added to publications. +ALTER PUBLICATION testpub_default ADD TABLE testpub_tbl1; +ALTER PUBLICATION testpub_default SET TABLE testpub_tbl1; +ALTER PUBLICATION testpub_default ADD TABLE pub_test.testpub_nopk; +ALTER PUBLICATION testpib_ins_trunct ADD TABLE pub_test.testpub_nopk, testpub_tbl1; +\d+ pub_test.testpub_nopk + Table "pub_test.testpub_nopk" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+---------+---------+--------------+------------- + foo | integer | | | | plain | | + bar | integer | | | | plain | | +Publications: + "testpib_ins_trunct" + "testpub_default" + "testpub_fortbl" + +\d+ testpub_tbl1 + Table "public.testpub_tbl1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------------+----------+--------------+------------- + id | integer | | not null | nextval('testpub_tbl1_id_seq'::regclass) | plain | | + data | text | | | | extended | | +Indexes: + "testpub_tbl1_pkey" PRIMARY KEY, btree (id) +Publications: + "testpib_ins_trunct" + "testpub_default" + "testpub_fortbl" + +\dRp+ testpub_default + Publication testpub_default + Inserts | Updates | Deletes +---------+---------+--------- + t | t | t +Tables: + "pub_test.testpub_nopk" + "public.testpub_tbl1" + +ALTER PUBLICATION testpub_default DROP TABLE testpub_tbl1, pub_test.testpub_nopk; +-- fail - nonexistent +ALTER PUBLICATION testpub_default DROP TABLE pub_test.testpub_nopk; +ERROR: relation "testpub_nopk" is not part of the publication +\d+ testpub_tbl1 + Table "public.testpub_tbl1" + Column | Type | Collation | Nullable | Default | Storage | Stats target | Description +--------+---------+-----------+----------+------------------------------------------+----------+--------------+------------- + id | integer | | not null | nextval('testpub_tbl1_id_seq'::regclass) | plain | | + data | text | | | | extended | | +Indexes: + "testpub_tbl1_pkey" PRIMARY KEY, btree (id) +Publications: + "testpib_ins_trunct" + "testpub_fortbl" + +DROP VIEW testpub_view; +DROP TABLE testpub_tbl1; +\dRp+ testpub_default + Publication testpub_default + Inserts | Updates | Deletes +---------+---------+--------- + t | t | t +(1 row) + +DROP PUBLICATION testpub_default; +DROP PUBLICATION testpib_ins_trunct; +DROP SCHEMA pub_test CASCADE; +NOTICE: drop cascades to table pub_test.testpub_nopk +RESET SESSION AUTHORIZATION; +DROP ROLE regress_publication_user; diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index add6adc871..60abcad101 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1417,6 +1417,14 @@ pg_prepared_xacts| SELECT p.transaction, FROM ((pg_prepared_xact() p(transaction, gid, prepared, ownerid, dbid) LEFT JOIN pg_authid u ON ((p.ownerid = u.oid))) LEFT JOIN pg_database d ON ((p.dbid = d.oid))); +pg_publication_tables| SELECT p.pubname, + n.nspname AS schemaname, + c.relname AS tablename + FROM pg_publication p, + (pg_class c + JOIN pg_namespace n ON ((n.oid = c.relnamespace))) + WHERE (c.oid IN ( SELECT pg_get_publication_tables.relid + FROM pg_get_publication_tables((p.pubname)::text) pg_get_publication_tables(relid))); pg_replication_origin_status| SELECT pg_show_replication_origin_status.local_id, pg_show_replication_origin_status.external_id, pg_show_replication_origin_status.remote_lsn, @@ -1822,6 +1830,16 @@ pg_stat_ssl| SELECT s.pid, s.sslcompression AS compression, s.sslclientdn AS clientdn FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, ssl, sslversion, sslcipher, sslbits, sslcompression, sslclientdn); +pg_stat_subscription| SELECT su.oid AS subid, + su.subname, + st.pid, + st.received_lsn, + st.last_msg_send_time, + st.last_msg_receipt_time, + st.latest_end_lsn, + st.latest_end_time + FROM (pg_subscription su + LEFT JOIN pg_stat_get_subscription(NULL::oid) st(subid, pid, received_lsn, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time) ON ((st.subid = su.oid))); pg_stat_sys_indexes| SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index 7ad68c745b..0af013f8a2 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -124,6 +124,8 @@ pg_partitioned_table|t pg_pltemplate|t pg_policy|t pg_proc|t +pg_publication|t +pg_publication_rel|t pg_range|t pg_replication_origin|t pg_rewrite|t @@ -133,6 +135,7 @@ pg_shdepend|t pg_shdescription|t pg_shseclabel|t pg_statistic|t +pg_subscription|t pg_tablespace|t pg_transform|t pg_trigger|t diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out new file mode 100644 index 0000000000..2ccec98b15 --- /dev/null +++ b/src/test/regress/expected/subscription.out @@ -0,0 +1,66 @@ +-- +-- SUBSCRIPTION +-- +CREATE ROLE regress_subscription_user LOGIN SUPERUSER; +SET SESSION AUTHORIZATION 'regress_subscription_user'; +-- fail - no publications +CREATE SUBSCRIPTION testsub CONNECTION 'foo'; +ERROR: syntax error at or near ";" +LINE 1: CREATE SUBSCRIPTION testsub CONNECTION 'foo'; + ^ +-- fail - no connection +CREATE SUBSCRIPTION testsub PUBLICATION foo; +ERROR: syntax error at or near "PUBLICATION" +LINE 1: CREATE SUBSCRIPTION testsub PUBLICATION foo; + ^ +set client_min_messages to error; +CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub; +ERROR: invalid connection string syntax: missing "=" after "testconn" in connection info string + +CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT); +reset client_min_messages; +\dRs+ + List of subscriptions + Name | Owner | Enabled | Publication | Conninfo +---------+---------------------------+---------+-------------+--------------------- + testsub | regress_subscription_user | f | {testpub} | dbname=doesnotexist +(1 row) + +ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3; +\dRs + List of subscriptions + Name | Owner | Enabled | Publication +---------+---------------------------+---------+--------------------- + testsub | regress_subscription_user | f | {testpub2,testpub3} +(1 row) + +ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2'; +ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1; +\dRs+ + List of subscriptions + Name | Owner | Enabled | Publication | Conninfo +---------+---------------------------+---------+--------------------+---------------------- + testsub | regress_subscription_user | f | {testpub,testpub1} | dbname=doesnotexist2 +(1 row) + +BEGIN; +ALTER SUBSCRIPTION testsub ENABLE; +\dRs + List of subscriptions + Name | Owner | Enabled | Publication +---------+---------------------------+---------+-------------------- + testsub | regress_subscription_user | t | {testpub,testpub1} +(1 row) + +ALTER SUBSCRIPTION testsub DISABLE; +\dRs + List of subscriptions + Name | Owner | Enabled | Publication +---------+---------------------------+---------+-------------------- + testsub | regress_subscription_user | f | {testpub,testpub1} +(1 row) + +COMMIT; +DROP SUBSCRIPTION testsub NODROP SLOT; +RESET SESSION AUTHORIZATION; +DROP ROLE regress_subscription_user; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 8641769351..e9b2bad6fd 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -97,6 +97,9 @@ test: rules psql_crosstab amutils # run by itself so it can run parallel workers test: select_parallel +# no relation related tests can be put in this group +test: publication subscription + # ---------- # Another group of parallel tests # ---------- diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 835cf3556c..7cdc0f6a69 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -127,6 +127,8 @@ test: tsrf test: rules test: psql_crosstab test: select_parallel +test: publication +test: subscription test: amutils test: select_views test: portals_p2 diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql new file mode 100644 index 0000000000..89a31672fa --- /dev/null +++ b/src/test/regress/sql/publication.sql @@ -0,0 +1,82 @@ +-- +-- PUBLICATION +-- +CREATE ROLE regress_publication_user LOGIN SUPERUSER; +SET SESSION AUTHORIZATION 'regress_publication_user'; + +CREATE PUBLICATION testpub_default; + +CREATE PUBLICATION testpib_ins_trunct WITH (nopublish delete, nopublish update); + +ALTER PUBLICATION testpub_default WITH (nopublish insert, nopublish delete); + +\dRp + +ALTER PUBLICATION testpub_default WITH (publish insert, publish delete); + +\dRp + +--- adding tables +CREATE SCHEMA pub_test; +CREATE TABLE testpub_tbl1 (id serial primary key, data text); +CREATE TABLE pub_test.testpub_nopk (foo int, bar int); +CREATE VIEW testpub_view AS SELECT 1; + +CREATE PUBLICATION testpub_foralltables FOR ALL TABLES WITH (nopublish delete, nopublish update); +ALTER PUBLICATION testpub_foralltables WITH (publish update); + +CREATE TABLE testpub_tbl2 (id serial primary key, data text); +-- fail - can't add to for all tables publication +ALTER PUBLICATION testpub_foralltables ADD TABLE testpub_tbl2; +-- fail - can't drop from all tables publication +ALTER PUBLICATION testpub_foralltables DROP TABLE testpub_tbl2; +-- fail - can't add to for all tables publication +ALTER PUBLICATION testpub_foralltables SET TABLE pub_test.testpub_nopk; + +SELECT pubname, puballtables FROM pg_publication WHERE pubname = 'testpub_foralltables'; +\d+ testpub_tbl2 + +DROP TABLE testpub_tbl2; +DROP PUBLICATION testpub_foralltables; + +-- fail - view +CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_view; +CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1, pub_test.testpub_nopk; +-- fail - already added +ALTER PUBLICATION testpub_fortbl ADD TABLE testpub_tbl1; +-- fail - already added +CREATE PUBLICATION testpub_fortbl FOR TABLE testpub_tbl1; + +\dRp+ testpub_fortbl + +-- fail - view +ALTER PUBLICATION testpub_default ADD TABLE testpub_view; + +ALTER PUBLICATION testpub_default ADD TABLE testpub_tbl1; +ALTER PUBLICATION testpub_default SET TABLE testpub_tbl1; +ALTER PUBLICATION testpub_default ADD TABLE pub_test.testpub_nopk; + +ALTER PUBLICATION testpib_ins_trunct ADD TABLE pub_test.testpub_nopk, testpub_tbl1; + +\d+ pub_test.testpub_nopk +\d+ testpub_tbl1 +\dRp+ testpub_default + +ALTER PUBLICATION testpub_default DROP TABLE testpub_tbl1, pub_test.testpub_nopk; +-- fail - nonexistent +ALTER PUBLICATION testpub_default DROP TABLE pub_test.testpub_nopk; + +\d+ testpub_tbl1 + +DROP VIEW testpub_view; +DROP TABLE testpub_tbl1; + +\dRp+ testpub_default + +DROP PUBLICATION testpub_default; +DROP PUBLICATION testpib_ins_trunct; + +DROP SCHEMA pub_test CASCADE; + +RESET SESSION AUTHORIZATION; +DROP ROLE regress_publication_user; diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql new file mode 100644 index 0000000000..68c17d5cfd --- /dev/null +++ b/src/test/regress/sql/subscription.sql @@ -0,0 +1,44 @@ +-- +-- SUBSCRIPTION +-- + +CREATE ROLE regress_subscription_user LOGIN SUPERUSER; +SET SESSION AUTHORIZATION 'regress_subscription_user'; + +-- fail - no publications +CREATE SUBSCRIPTION testsub CONNECTION 'foo'; + +-- fail - no connection +CREATE SUBSCRIPTION testsub PUBLICATION foo; + +set client_min_messages to error; +CREATE SUBSCRIPTION testsub CONNECTION 'testconn' PUBLICATION testpub; +CREATE SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist' PUBLICATION testpub WITH (DISABLED, NOCREATE SLOT); +reset client_min_messages; + +\dRs+ + +ALTER SUBSCRIPTION testsub SET PUBLICATION testpub2, testpub3; + +\dRs + +ALTER SUBSCRIPTION testsub CONNECTION 'dbname=doesnotexist2'; +ALTER SUBSCRIPTION testsub SET PUBLICATION testpub, testpub1; + +\dRs+ + +BEGIN; +ALTER SUBSCRIPTION testsub ENABLE; + +\dRs + +ALTER SUBSCRIPTION testsub DISABLE; + +\dRs + +COMMIT; + +DROP SUBSCRIPTION testsub NODROP SLOT; + +RESET SESSION AUTHORIZATION; +DROP ROLE regress_subscription_user; diff --git a/src/test/subscription/.gitignore b/src/test/subscription/.gitignore new file mode 100644 index 0000000000..871e943d50 --- /dev/null +++ b/src/test/subscription/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/subscription/Makefile b/src/test/subscription/Makefile new file mode 100644 index 0000000000..bb9795453a --- /dev/null +++ b/src/test/subscription/Makefile @@ -0,0 +1,22 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/subscription +# +# Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/subscription/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/subscription +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +EXTRA_INSTALL = contrib/hstore + +check: + $(prove_check) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/subscription/README b/src/test/subscription/README new file mode 100644 index 0000000000..e9e93755b7 --- /dev/null +++ b/src/test/subscription/README @@ -0,0 +1,16 @@ +src/test/subscription/README + +Regression tests for subscription/logical replication +===================================================== + +This directory contains a test suite for subscription/logical replication. + +Running the tests +================= + + make check + +NOTE: This creates a temporary installation, and some tests may +create one or multiple nodes, for the purpose of the tests. + +NOTE: This requires the --enable-tap-tests argument to configure. diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl new file mode 100644 index 0000000000..b51740bcd4 --- /dev/null +++ b/src/test/subscription/t/001_rep_changes.pl @@ -0,0 +1,188 @@ +# Basic logical replication test +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 11; + +# Initialize publisher node +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +# Create subscriber node +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create some preexisting content on publisher +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_notrep AS SELECT generate_series(1,10) AS a"); +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_ins (a int)"); +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_full AS SELECT generate_series(1,10) AS a"); +$node_publisher->safe_psql('postgres', + "CREATE TABLE tab_rep (a int primary key)"); + +# Setup structure on subscriber +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_notrep (a int)"); +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_ins (a int)"); +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_full (a int)"); +$node_subscriber->safe_psql('postgres', + "CREATE TABLE tab_rep (a int primary key)"); + +# Setup logical replication +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub"); +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub_ins_only WITH (nopublish delete, nopublish update)"); +$node_publisher->safe_psql('postgres', + "ALTER PUBLICATION tap_pub ADD TABLE tab_rep, tab_full"); +$node_publisher->safe_psql('postgres', + "ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_ins"); + +my $appname = 'tap_sub'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub, tap_pub_ins_only"); + +# Wait for subscriber to finish initialization +my $caughtup_query = +"SELECT pg_current_xlog_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$appname';"; +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +my $result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_notrep"); +is($result, qq(0), 'check non-replicated table is empty on subscriber'); + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_ins SELECT generate_series(1,50)"); +$node_publisher->safe_psql('postgres', + "DELETE FROM tab_ins WHERE a > 20"); +$node_publisher->safe_psql('postgres', + "UPDATE tab_ins SET a = -a"); + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_rep SELECT generate_series(1,50)"); +$node_publisher->safe_psql('postgres', + "DELETE FROM tab_rep WHERE a > 20"); +$node_publisher->safe_psql('postgres', + "UPDATE tab_rep SET a = -a"); + +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins"); +is($result, qq(50|1|50), 'check replicated inserts on subscriber'); + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep"); +is($result, qq(20|-20|-1), 'check replicated changes on subscriber'); + +# insert some duplicate rows +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_full SELECT generate_series(1,10)"); + +# add REPLICA IDENTITY FULL so we can update +$node_publisher->safe_psql('postgres', + "ALTER TABLE tab_full REPLICA IDENTITY FULL"); +$node_subscriber->safe_psql('postgres', + "ALTER TABLE tab_full REPLICA IDENTITY FULL"); +$node_publisher->safe_psql('postgres', + "ALTER TABLE tab_ins REPLICA IDENTITY FULL"); +$node_subscriber->safe_psql('postgres', + "ALTER TABLE tab_ins REPLICA IDENTITY FULL"); + +# and do the update +$node_publisher->safe_psql('postgres', + "UPDATE tab_full SET a = a * a"); + +# Wait for subscription to catch up +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full"); +is($result, qq(10|1|100), 'update works with REPLICA IDENTITY FULL and duplicate tuples'); + +# check that change of connection string and/or publication list causes +# restart of subscription workers. Not all of these are registered as tests +# as we need to poll for a change but the test suite will fail none the less +# when something goes wrong. +my $oldpid = $node_publisher->safe_psql('postgres', + "SELECT pid FROM pg_stat_replication WHERE application_name = '$appname';"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub CONNECTION 'application_name=$appname $publisher_connstr'"); +$node_publisher->poll_query_until('postgres', + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = '$appname';") + or die "Timed out while waiting for apply to restart"; + +$oldpid = $node_publisher->safe_psql('postgres', + "SELECT pid FROM pg_stat_replication WHERE application_name = '$appname';"); +$node_subscriber->safe_psql('postgres', + "ALTER SUBSCRIPTION tap_sub SET PUBLICATION tap_pub_ins_only"); +$node_publisher->poll_query_until('postgres', + "SELECT pid != $oldpid FROM pg_stat_replication WHERE application_name = '$appname';") + or die "Timed out while waiting for apply to restart"; + +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_ins SELECT generate_series(1001,1100)"); +$node_publisher->safe_psql('postgres', + "DELETE FROM tab_rep"); + +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins"); +is($result, qq(150|1|1100), 'check replicated inserts after subscription publication change'); + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_rep"); +is($result, qq(20|-20|-1), 'check changes skipped after subscription publication change'); + +# check alter publication (relcache invalidation etc) +$node_publisher->safe_psql('postgres', + "ALTER PUBLICATION tap_pub_ins_only WITH (publish delete)"); +$node_publisher->safe_psql('postgres', + "ALTER PUBLICATION tap_pub_ins_only ADD TABLE tab_full"); +$node_publisher->safe_psql('postgres', + "DELETE FROM tab_ins WHERE a > 0"); +$node_publisher->safe_psql('postgres', + "INSERT INTO tab_full VALUES(0)"); + +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +# note that data are different on provider and subscriber +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_ins"); +is($result, qq(50|1|50), 'check replicated deletes after alter publication'); + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*), min(a), max(a) FROM tab_full"); +is($result, qq(11|0|100), 'check replicated insert after alter publication'); + +# check all the cleanup +$node_subscriber->safe_psql('postgres', "DROP SUBSCRIPTION tap_sub"); + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_subscription"); +is($result, qq(0), 'check subscription was dropped on subscriber'); + +$result = + $node_publisher->safe_psql('postgres', "SELECT count(*) FROM pg_replication_slots"); +is($result, qq(0), 'check replication slot was dropped on publisher'); + +$result = + $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM pg_replication_origin"); +is($result, qq(0), 'check replication origin was dropped on subscriber'); + +$node_subscriber->stop('fast'); +$node_publisher->stop('fast'); diff --git a/src/test/subscription/t/002_types.pl b/src/test/subscription/t/002_types.pl new file mode 100644 index 0000000000..9064eb4c6d --- /dev/null +++ b/src/test/subscription/t/002_types.pl @@ -0,0 +1,539 @@ +# This tests that more complex datatypes are replicated correctly +# by logical replication +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 3; + +# Initialize publisher node +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +# Create subscriber node +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create some preexisting content on publisher +my $ddl = qq( + CREATE EXTENSION hstore WITH SCHEMA public; + CREATE TABLE public.tst_one_array ( + a INTEGER PRIMARY KEY, + b INTEGER[] + ); + CREATE TABLE public.tst_arrays ( + a INTEGER[] PRIMARY KEY, + b TEXT[], + c FLOAT[], + d INTERVAL[] + ); + + CREATE TYPE public.tst_enum_t AS ENUM ('a', 'b', 'c', 'd', 'e'); + CREATE TABLE public.tst_one_enum ( + a INTEGER PRIMARY KEY, + b public.tst_enum_t + ); + CREATE TABLE public.tst_enums ( + a public.tst_enum_t PRIMARY KEY, + b public.tst_enum_t[] + ); + + CREATE TYPE public.tst_comp_basic_t AS (a FLOAT, b TEXT, c INTEGER); + CREATE TYPE public.tst_comp_enum_t AS (a FLOAT, b public.tst_enum_t, c INTEGER); + CREATE TYPE public.tst_comp_enum_array_t AS (a FLOAT, b public.tst_enum_t[], c INTEGER); + CREATE TABLE public.tst_one_comp ( + a INTEGER PRIMARY KEY, + b public.tst_comp_basic_t + ); + CREATE TABLE public.tst_comps ( + a public.tst_comp_basic_t PRIMARY KEY, + b public.tst_comp_basic_t[] + ); + CREATE TABLE public.tst_comp_enum ( + a INTEGER PRIMARY KEY, + b public.tst_comp_enum_t + ); + CREATE TABLE public.tst_comp_enum_array ( + a public.tst_comp_enum_t PRIMARY KEY, + b public.tst_comp_enum_t[] + ); + CREATE TABLE public.tst_comp_one_enum_array ( + a INTEGER PRIMARY KEY, + b public.tst_comp_enum_array_t + ); + CREATE TABLE public.tst_comp_enum_what ( + a public.tst_comp_enum_array_t PRIMARY KEY, + b public.tst_comp_enum_array_t[] + ); + + CREATE TYPE public.tst_comp_mix_t AS ( + a public.tst_comp_basic_t, + b public.tst_comp_basic_t[], + c public.tst_enum_t, + d public.tst_enum_t[] + ); + CREATE TABLE public.tst_comp_mix_array ( + a public.tst_comp_mix_t PRIMARY KEY, + b public.tst_comp_mix_t[] + ); + CREATE TABLE public.tst_range ( + a INTEGER PRIMARY KEY, + b int4range + ); + CREATE TABLE public.tst_range_array ( + a INTEGER PRIMARY KEY, + b TSTZRANGE, + c int8range[] + ); + CREATE TABLE public.tst_hstore ( + a INTEGER PRIMARY KEY, + b public.hstore + );); + +# Setup structure on both nodes +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +# Setup logical replication +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub FOR ALL TABLES"); + +my $appname = 'tap_sub'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (SLOT NAME = tap_sub_slot)"); + +# Wait for subscriber to finish initialization +my $caughtup_query = +"SELECT pg_current_xlog_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$appname';"; +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +# Insert initial test data +$node_publisher->safe_psql('postgres', qq( + -- test_tbl_one_array_col + INSERT INTO tst_one_array (a, b) VALUES + (1, '{1, 2, 3}'), + (2, '{2, 3, 1}'), + (3, '{3, 2, 1}'), + (4, '{4, 3, 2}'), + (5, '{5, NULL, 3}'); + + -- test_tbl_arrays + INSERT INTO tst_arrays (a, b, c, d) VALUES + ('{1, 2, 3}', '{"a", "b", "c"}', '{1.1, 2.2, 3.3}', '{"1 day", "2 days", "3 days"}'), + ('{2, 3, 1}', '{"b", "c", "a"}', '{2.2, 3.3, 1.1}', '{"2 minutes", "3 minutes", "1 minute"}'), + ('{3, 1, 2}', '{"c", "a", "b"}', '{3.3, 1.1, 2.2}', '{"3 years", "1 year", "2 years"}'), + ('{4, 1, 2}', '{"d", "a", "b"}', '{4.4, 1.1, 2.2}', '{"4 years", "1 year", "2 years"}'), + ('{5, NULL, NULL}', '{"e", NULL, "b"}', '{5.5, 1.1, NULL}', '{"5 years", NULL, NULL}'); + + -- test_tbl_single_enum + INSERT INTO tst_one_enum (a, b) VALUES + (1, 'a'), + (2, 'b'), + (3, 'c'), + (4, 'd'), + (5, NULL); + + -- test_tbl_enums + INSERT INTO tst_enums (a, b) VALUES + ('a', '{b, c}'), + ('b', '{c, a}'), + ('c', '{b, a}'), + ('d', '{c, b}'), + ('e', '{d, NULL}'); + + -- test_tbl_single_composites + INSERT INTO tst_one_comp (a, b) VALUES + (1, ROW(1.0, 'a', 1)), + (2, ROW(2.0, 'b', 2)), + (3, ROW(3.0, 'c', 3)), + (4, ROW(4.0, 'd', 4)), + (5, ROW(NULL, NULL, 5)); + + -- test_tbl_composites + INSERT INTO tst_comps (a, b) VALUES + (ROW(1.0, 'a', 1), ARRAY[ROW(1, 'a', 1)::tst_comp_basic_t]), + (ROW(2.0, 'b', 2), ARRAY[ROW(2, 'b', 2)::tst_comp_basic_t]), + (ROW(3.0, 'c', 3), ARRAY[ROW(3, 'c', 3)::tst_comp_basic_t]), + (ROW(4.0, 'd', 4), ARRAY[ROW(4, 'd', 3)::tst_comp_basic_t]), + (ROW(5.0, 'e', NULL), ARRAY[NULL, ROW(5, NULL, 5)::tst_comp_basic_t]); + + -- test_tbl_composite_with_enums + INSERT INTO tst_comp_enum (a, b) VALUES + (1, ROW(1.0, 'a', 1)), + (2, ROW(2.0, 'b', 2)), + (3, ROW(3.0, 'c', 3)), + (4, ROW(4.0, 'd', 4)), + (5, ROW(NULL, 'e', NULL)); + + -- test_tbl_composite_with_enums_array + INSERT INTO tst_comp_enum_array (a, b) VALUES + (ROW(1.0, 'a', 1), ARRAY[ROW(1, 'a', 1)::tst_comp_enum_t]), + (ROW(2.0, 'b', 2), ARRAY[ROW(2, 'b', 2)::tst_comp_enum_t]), + (ROW(3.0, 'c', 3), ARRAY[ROW(3, 'c', 3)::tst_comp_enum_t]), + (ROW(4.0, 'd', 3), ARRAY[ROW(3, 'd', 3)::tst_comp_enum_t]), + (ROW(5.0, 'e', 3), ARRAY[ROW(3, 'e', 3)::tst_comp_enum_t, NULL]); + + -- test_tbl_composite_with_single_enums_array_in_composite + INSERT INTO tst_comp_one_enum_array (a, b) VALUES + (1, ROW(1.0, '{a, b, c}', 1)), + (2, ROW(2.0, '{a, b, c}', 2)), + (3, ROW(3.0, '{a, b, c}', 3)), + (4, ROW(4.0, '{c, b, d}', 4)), + (5, ROW(5.0, '{NULL, e, NULL}', 5)); + + -- test_tbl_composite_with_enums_array_in_composite + INSERT INTO tst_comp_enum_what (a, b) VALUES + (ROW(1.0, '{a, b, c}', 1), ARRAY[ROW(1, '{a, b, c}', 1)::tst_comp_enum_array_t]), + (ROW(2.0, '{b, c, a}', 2), ARRAY[ROW(2, '{b, c, a}', 1)::tst_comp_enum_array_t]), + (ROW(3.0, '{c, a, b}', 1), ARRAY[ROW(3, '{c, a, b}', 1)::tst_comp_enum_array_t]), + (ROW(4.0, '{c, b, d}', 4), ARRAY[ROW(4, '{c, b, d}', 4)::tst_comp_enum_array_t]), + (ROW(5.0, '{c, NULL, b}', NULL), ARRAY[ROW(5, '{c, e, b}', 1)::tst_comp_enum_array_t]); + + -- test_tbl_mixed_composites + INSERT INTO tst_comp_mix_array (a, b) VALUES + (ROW( + ROW(1,'a',1), + ARRAY[ROW(1,'a',1)::tst_comp_basic_t, ROW(2,'b',2)::tst_comp_basic_t], + 'a', + '{a,b,NULL,c}'), + ARRAY[ + ROW( + ROW(1,'a',1), + ARRAY[ + ROW(1,'a',1)::tst_comp_basic_t, + ROW(2,'b',2)::tst_comp_basic_t, + NULL + ], + 'a', + '{a,b,c}' + )::tst_comp_mix_t + ] + ); + + -- test_tbl_range + INSERT INTO tst_range (a, b) VALUES + (1, '[1, 10]'), + (2, '[2, 20]'), + (3, '[3, 30]'), + (4, '[4, 40]'), + (5, '[5, 50]'); + + -- test_tbl_range_array + INSERT INTO tst_range_array (a, b, c) VALUES + (1, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), '{"[1,2]", "[10,20]"}'), + (2, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz - interval '2 days', 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[2,3]", "[20,30]"}'), + (3, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz - interval '3 days', 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[3,4]"}'), + (4, tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz - interval '4 days', 'Mon Aug 04 00:00:00 2014 CEST'::timestamptz), '{"[4,5]", NULL, "[40,50]"}'), + (5, NULL, NULL); + + -- tst_hstore + INSERT INTO tst_hstore (a, b) VALUES + (1, '"a"=>"1"'), + (2, '"zzz"=>"foo"'), + (3, '"123"=>"321"'), + (4, '"yellow horse"=>"moaned"'); +)); + +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +# Check the data on subscriber +my $result = $node_subscriber->safe_psql('postgres', qq( + SET timezone = '+2'; + SELECT a, b FROM tst_one_array ORDER BY a; + SELECT a, b, c, d FROM tst_arrays ORDER BY a; + SELECT a, b FROM tst_one_enum ORDER BY a; + SELECT a, b FROM tst_enums ORDER BY a; + SELECT a, b FROM tst_one_comp ORDER BY a; + SELECT a, b FROM tst_comps ORDER BY a; + SELECT a, b FROM tst_comp_enum ORDER BY a; + SELECT a, b FROM tst_comp_enum_array ORDER BY a; + SELECT a, b FROM tst_comp_one_enum_array ORDER BY a; + SELECT a, b FROM tst_comp_enum_what ORDER BY a; + SELECT a, b FROM tst_comp_mix_array ORDER BY a; + SELECT a, b FROM tst_range ORDER BY a; + SELECT a, b, c FROM tst_range_array ORDER BY a; + SELECT a, b FROM tst_hstore ORDER BY a; +)); + +is($result, '1|{1,2,3} +2|{2,3,1} +3|{3,2,1} +4|{4,3,2} +5|{5,NULL,3} +{1,2,3}|{a,b,c}|{1.1,2.2,3.3}|{"1 day","2 days","3 days"} +{2,3,1}|{b,c,a}|{2.2,3.3,1.1}|{00:02:00,00:03:00,00:01:00} +{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"} +{4,1,2}|{d,a,b}|{4.4,1.1,2.2}|{"4 years","1 year","2 years"} +{5,NULL,NULL}|{e,NULL,b}|{5.5,1.1,NULL}|{"5 years",NULL,NULL} +1|a +2|b +3|c +4|d +5| +a|{b,c} +b|{c,a} +c|{b,a} +d|{c,b} +e|{d,NULL} +1|(1,a,1) +2|(2,b,2) +3|(3,c,3) +4|(4,d,4) +5|(,,5) +(1,a,1)|{"(1,a,1)"} +(2,b,2)|{"(2,b,2)"} +(3,c,3)|{"(3,c,3)"} +(4,d,4)|{"(4,d,3)"} +(5,e,)|{NULL,"(5,,5)"} +1|(1,a,1) +2|(2,b,2) +3|(3,c,3) +4|(4,d,4) +5|(,e,) +(1,a,1)|{"(1,a,1)"} +(2,b,2)|{"(2,b,2)"} +(3,c,3)|{"(3,c,3)"} +(4,d,3)|{"(3,d,3)"} +(5,e,3)|{"(3,e,3)",NULL} +1|(1,"{a,b,c}",1) +2|(2,"{a,b,c}",2) +3|(3,"{a,b,c}",3) +4|(4,"{c,b,d}",4) +5|(5,"{NULL,e,NULL}",5) +(1,"{a,b,c}",1)|{"(1,\"{a,b,c}\",1)"} +(2,"{b,c,a}",2)|{"(2,\"{b,c,a}\",1)"} +(3,"{c,a,b}",1)|{"(3,\"{c,a,b}\",1)"} +(4,"{c,b,d}",4)|{"(4,\"{c,b,d}\",4)"} +(5,"{c,NULL,b}",)|{"(5,\"{c,e,b}\",1)"} +("(1,a,1)","{""(1,a,1)"",""(2,b,2)""}",a,"{a,b,NULL,c}")|{"(\"(1,a,1)\",\"{\"\"(1,a,1)\"\",\"\"(2,b,2)\"\",NULL}\",a,\"{a,b,c}\")"} +1|[1,11) +2|[2,21) +3|[3,31) +4|[4,41) +5|[5,51) +1|["2014-08-04 00:00:00+02",infinity)|{"[1,3)","[10,21)"} +2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"} +3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"} +4|["2014-07-31 00:00:00+02","2014-08-04 00:00:00+02")|{"[4,6)",NULL,"[40,51)"} +5|| +1|"a"=>"1" +2|"zzz"=>"foo" +3|"123"=>"321" +4|"yellow horse"=>"moaned"', +'check replicated inserts on subscriber'); + +# Run batch of updates +$node_publisher->safe_psql('postgres', qq( + UPDATE tst_one_array SET b = '{4, 5, 6}' WHERE a = 1; + UPDATE tst_one_array SET b = '{4, 5, 6, 1}' WHERE a > 3; + UPDATE tst_arrays SET b = '{"1a", "2b", "3c"}', c = '{1.0, 2.0, 3.0}', d = '{"1 day 1 second", "2 days 2 seconds", "3 days 3 second"}' WHERE a = '{1, 2, 3}'; + UPDATE tst_arrays SET b = '{"c", "d", "e"}', c = '{3.0, 4.0, 5.0}', d = '{"3 day 1 second", "4 days 2 seconds", "5 days 3 second"}' WHERE a[1] > 3; + UPDATE tst_one_enum SET b = 'c' WHERE a = 1; + UPDATE tst_one_enum SET b = NULL WHERE a > 3; + UPDATE tst_enums SET b = '{e, NULL}' WHERE a = 'a'; + UPDATE tst_enums SET b = '{e, d}' WHERE a > 'c'; + UPDATE tst_one_comp SET b = ROW(1.0, 'A', 1) WHERE a = 1; + UPDATE tst_one_comp SET b = ROW(NULL, 'x', -1) WHERE a > 3; + UPDATE tst_comps SET b = ARRAY[ROW(9, 'x', -1)::tst_comp_basic_t] WHERE (a).a = 1.0; + UPDATE tst_comps SET b = ARRAY[NULL, ROW(9, 'x', NULL)::tst_comp_basic_t] WHERE (a).a > 3.9; + UPDATE tst_comp_enum SET b = ROW(1.0, NULL, NULL) WHERE a = 1; + UPDATE tst_comp_enum SET b = ROW(4.0, 'd', 44) WHERE a > 3; + UPDATE tst_comp_enum_array SET b = ARRAY[NULL, ROW(3, 'd', 3)::tst_comp_enum_t] WHERE a = ROW(1.0, 'a', 1)::tst_comp_enum_t; + UPDATE tst_comp_enum_array SET b = ARRAY[ROW(1, 'a', 1)::tst_comp_enum_t, ROW(2, 'b', 2)::tst_comp_enum_t] WHERE (a).a > 3; + UPDATE tst_comp_one_enum_array SET b = ROW(1.0, '{a, e, c}', NULL) WHERE a = 1; + UPDATE tst_comp_one_enum_array SET b = ROW(4.0, '{c, b, d}', 4) WHERE a > 3; + UPDATE tst_comp_enum_what SET b = ARRAY[NULL, ROW(1, '{a, b, c}', 1)::tst_comp_enum_array_t, ROW(NULL, '{a, e, c}', 2)::tst_comp_enum_array_t] WHERE (a).a = 1; + UPDATE tst_comp_enum_what SET b = ARRAY[ROW(5, '{a, b, c}', 5)::tst_comp_enum_array_t] WHERE (a).a > 3; + UPDATE tst_comp_mix_array SET b[2] = NULL WHERE ((a).a).a = 1; + UPDATE tst_range SET b = '[100, 1000]' WHERE a = 1; + UPDATE tst_range SET b = '(1, 90)' WHERE a > 3; + UPDATE tst_range_array SET c = '{"[100, 1000]"}' WHERE a = 1; + UPDATE tst_range_array SET b = tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'infinity'), c = '{NULL, "[11,9999999]"}' WHERE a > 3; + UPDATE tst_hstore SET b = '"updated"=>"value"' WHERE a < 3; + UPDATE tst_hstore SET b = '"also"=>"updated"' WHERE a = 3; +)); + +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +# Check the data on subscriber +$result = $node_subscriber->safe_psql('postgres', qq( + SET timezone = '+2'; + SELECT a, b FROM tst_one_array ORDER BY a; + SELECT a, b, c, d FROM tst_arrays ORDER BY a; + SELECT a, b FROM tst_one_enum ORDER BY a; + SELECT a, b FROM tst_enums ORDER BY a; + SELECT a, b FROM tst_one_comp ORDER BY a; + SELECT a, b FROM tst_comps ORDER BY a; + SELECT a, b FROM tst_comp_enum ORDER BY a; + SELECT a, b FROM tst_comp_enum_array ORDER BY a; + SELECT a, b FROM tst_comp_one_enum_array ORDER BY a; + SELECT a, b FROM tst_comp_enum_what ORDER BY a; + SELECT a, b FROM tst_comp_mix_array ORDER BY a; + SELECT a, b FROM tst_range ORDER BY a; + SELECT a, b, c FROM tst_range_array ORDER BY a; + SELECT a, b FROM tst_hstore ORDER BY a; +)); + +is($result, '1|{4,5,6} +2|{2,3,1} +3|{3,2,1} +4|{4,5,6,1} +5|{4,5,6,1} +{1,2,3}|{1a,2b,3c}|{1,2,3}|{"1 day 00:00:01","2 days 00:00:02","3 days 00:00:03"} +{2,3,1}|{b,c,a}|{2.2,3.3,1.1}|{00:02:00,00:03:00,00:01:00} +{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"} +{4,1,2}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"} +{5,NULL,NULL}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"} +1|c +2|b +3|c +4| +5| +a|{e,NULL} +b|{c,a} +c|{b,a} +d|{e,d} +e|{e,d} +1|(1,A,1) +2|(2,b,2) +3|(3,c,3) +4|(,x,-1) +5|(,x,-1) +(1,a,1)|{"(9,x,-1)"} +(2,b,2)|{"(2,b,2)"} +(3,c,3)|{"(3,c,3)"} +(4,d,4)|{NULL,"(9,x,)"} +(5,e,)|{NULL,"(9,x,)"} +1|(1,,) +2|(2,b,2) +3|(3,c,3) +4|(4,d,44) +5|(4,d,44) +(1,a,1)|{NULL,"(3,d,3)"} +(2,b,2)|{"(2,b,2)"} +(3,c,3)|{"(3,c,3)"} +(4,d,3)|{"(1,a,1)","(2,b,2)"} +(5,e,3)|{"(1,a,1)","(2,b,2)"} +1|(1,"{a,e,c}",) +2|(2,"{a,b,c}",2) +3|(3,"{a,b,c}",3) +4|(4,"{c,b,d}",4) +5|(4,"{c,b,d}",4) +(1,"{a,b,c}",1)|{NULL,"(1,\"{a,b,c}\",1)","(,\"{a,e,c}\",2)"} +(2,"{b,c,a}",2)|{"(2,\"{b,c,a}\",1)"} +(3,"{c,a,b}",1)|{"(3,\"{c,a,b}\",1)"} +(4,"{c,b,d}",4)|{"(5,\"{a,b,c}\",5)"} +(5,"{c,NULL,b}",)|{"(5,\"{a,b,c}\",5)"} +("(1,a,1)","{""(1,a,1)"",""(2,b,2)""}",a,"{a,b,NULL,c}")|{"(\"(1,a,1)\",\"{\"\"(1,a,1)\"\",\"\"(2,b,2)\"\",NULL}\",a,\"{a,b,c}\")",NULL} +1|[100,1001) +2|[2,21) +3|[3,31) +4|[2,90) +5|[2,90) +1|["2014-08-04 00:00:00+02",infinity)|{"[100,1001)"} +2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"} +3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"} +4|["2014-08-04 00:00:00+02",infinity)|{NULL,"[11,10000000)"} +5|["2014-08-04 00:00:00+02",infinity)|{NULL,"[11,10000000)"} +1|"updated"=>"value" +2|"updated"=>"value" +3|"also"=>"updated" +4|"yellow horse"=>"moaned"', +'check replicated updates on subscriber'); + +# Run batch of deletes +$node_publisher->safe_psql('postgres', qq( + DELETE FROM tst_one_array WHERE a = 1; + DELETE FROM tst_one_array WHERE b = '{2, 3, 1}'; + DELETE FROM tst_arrays WHERE a = '{1, 2, 3}'; + DELETE FROM tst_arrays WHERE a[1] = 2; + DELETE FROM tst_one_enum WHERE a = 1; + DELETE FROM tst_one_enum WHERE b = 'b'; + DELETE FROM tst_enums WHERE a = 'a'; + DELETE FROM tst_enums WHERE b[1] = 'b'; + DELETE FROM tst_one_comp WHERE a = 1; + DELETE FROM tst_one_comp WHERE (b).a = 2.0; + DELETE FROM tst_comps WHERE (a).b = 'a'; + DELETE FROM tst_comps WHERE ROW(3, 'c', 3)::tst_comp_basic_t = ANY(b); + DELETE FROM tst_comp_enum WHERE a = 1; + DELETE FROM tst_comp_enum WHERE (b).a = 2.0; + DELETE FROM tst_comp_enum_array WHERE a = ROW(1.0, 'a', 1)::tst_comp_enum_t; + DELETE FROM tst_comp_enum_array WHERE ROW(3, 'c', 3)::tst_comp_enum_t = ANY(b); + DELETE FROM tst_comp_one_enum_array WHERE a = 1; + DELETE FROM tst_comp_one_enum_array WHERE 'a' = ANY((b).b); + DELETE FROM tst_comp_enum_what WHERE (a).a = 1; + DELETE FROM tst_comp_enum_what WHERE (b[1]).b = '{c, a, b}'; + DELETE FROM tst_comp_mix_array WHERE ((a).a).a = 1; + DELETE FROM tst_range WHERE a = 1; + DELETE FROM tst_range WHERE '[10,20]' && b; + DELETE FROM tst_range_array WHERE a = 1; + DELETE FROM tst_range_array WHERE tstzrange('Mon Aug 04 00:00:00 2014 CEST'::timestamptz, 'Mon Aug 05 00:00:00 2014 CEST'::timestamptz) && b; + DELETE FROM tst_hstore WHERE a = 1; +)); + +$node_publisher->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for subscriber to catch up"; + +# Check the data on subscriber +$result = $node_subscriber->safe_psql('postgres', qq( + SET timezone = '+2'; + SELECT a, b FROM tst_one_array ORDER BY a; + SELECT a, b, c, d FROM tst_arrays ORDER BY a; + SELECT a, b FROM tst_one_enum ORDER BY a; + SELECT a, b FROM tst_enums ORDER BY a; + SELECT a, b FROM tst_one_comp ORDER BY a; + SELECT a, b FROM tst_comps ORDER BY a; + SELECT a, b FROM tst_comp_enum ORDER BY a; + SELECT a, b FROM tst_comp_enum_array ORDER BY a; + SELECT a, b FROM tst_comp_one_enum_array ORDER BY a; + SELECT a, b FROM tst_comp_enum_what ORDER BY a; + SELECT a, b FROM tst_comp_mix_array ORDER BY a; + SELECT a, b FROM tst_range ORDER BY a; + SELECT a, b, c FROM tst_range_array ORDER BY a; + SELECT a, b FROM tst_hstore ORDER BY a; +)); + +is($result, '3|{3,2,1} +4|{4,5,6,1} +5|{4,5,6,1} +{3,1,2}|{c,a,b}|{3.3,1.1,2.2}|{"3 years","1 year","2 years"} +{4,1,2}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"} +{5,NULL,NULL}|{c,d,e}|{3,4,5}|{"3 days 00:00:01","4 days 00:00:02","5 days 00:00:03"} +3|c +4| +5| +b|{c,a} +d|{e,d} +e|{e,d} +3|(3,c,3) +4|(,x,-1) +5|(,x,-1) +(2,b,2)|{"(2,b,2)"} +(4,d,4)|{NULL,"(9,x,)"} +(5,e,)|{NULL,"(9,x,)"} +3|(3,c,3) +4|(4,d,44) +5|(4,d,44) +(2,b,2)|{"(2,b,2)"} +(4,d,3)|{"(1,a,1)","(2,b,2)"} +(5,e,3)|{"(1,a,1)","(2,b,2)"} +4|(4,"{c,b,d}",4) +5|(4,"{c,b,d}",4) +(2,"{b,c,a}",2)|{"(2,\"{b,c,a}\",1)"} +(4,"{c,b,d}",4)|{"(5,\"{a,b,c}\",5)"} +(5,"{c,NULL,b}",)|{"(5,\"{a,b,c}\",5)"} +2|["2014-08-02 00:00:00+02","2014-08-04 00:00:00+02")|{"[2,4)","[20,31)"} +3|["2014-08-01 00:00:00+02","2014-08-04 00:00:00+02")|{"[3,5)"} +2|"updated"=>"value" +3|"also"=>"updated" +4|"yellow horse"=>"moaned"', +'check replicated deletes on subscriber'); + +$node_subscriber->stop('fast'); +$node_publisher->stop('fast');