postgresql/src/include/replication/walsender_private.h

/*-------------------------------------------------------------------------
 *
 * walsender_private.h
 *	  Private definitions from replication/walsender.c.
 *
 * Portions Copyright (c) 2010-2018, PostgreSQL Global Development Group
 *
 * src/include/replication/walsender_private.h
 *
 *-------------------------------------------------------------------------
 */
#ifndef _WALSENDER_PRIVATE_H
#define _WALSENDER_PRIVATE_H

#include "access/xlog.h"
#include "nodes/nodes.h"
#include "replication/syncrep.h"
#include "storage/latch.h"
#include "storage/shmem.h"
#include "storage/spin.h"

typedef enum WalSndState
{
	WALSNDSTATE_STARTUP = 0,
	WALSNDSTATE_BACKUP,
	WALSNDSTATE_CATCHUP,
	WALSNDSTATE_STREAMING,
	WALSNDSTATE_STOPPING
} WalSndState;

/*
 * Each walsender has a WalSnd struct in shared memory.
 *
 * This struct is protected by 'mutex', with two exceptions: one is
 * sync_standby_priority as noted below.  The other exception is that some
 * members are only written by the walsender process itself, and thus that
 * process is free to read those members without holding spinlock.  pid and
 * needreload always require the spinlock to be held for all accesses.
 */
typedef struct WalSnd
{
	pid_t		pid;			/* this walsender's PID, or 0 if not active */

	WalSndState state;			/* this walsender's state */
	XLogRecPtr	sentPtr;		/* WAL has been sent up to this point */
	bool		needreload;		/* does currently-open file need to be
								 * reloaded? */

	/*
	 * The xlog locations that have been written, flushed, and applied by
	 * standby-side. These may be invalid if the standby-side has not offered
	 * values yet.
	 */
	XLogRecPtr	write;
	XLogRecPtr	flush;
	XLogRecPtr	apply;

	/* Measured lag times, or -1 for unknown/none. */
	TimeOffset	writeLag;
	TimeOffset	flushLag;
	TimeOffset	applyLag;

	/* Protects shared variables shown above. */
	slock_t		mutex;

	/*
	 * Pointer to the walsender's latch. Used by backends to wake up this
	 * walsender when it has work to do. NULL if the walsender isn't active.
	 */
	Latch	   *latch;

	/*
	 * The priority order of the standby managed by this WALSender, as listed
	 * in synchronous_standby_names, or 0 if not-listed. Protected by
	 * SyncRepLock.
	 */
	int			sync_standby_priority;

	/*
	 * Timestamp of the last message received from standby.
	 */
	TimestampTz replyTime;
} WalSnd;

extern WalSnd *MyWalSnd;

/* There is one WalSndCtl struct for the whole database cluster */
typedef struct
{
	/*
	 * Synchronous replication queue with one queue per request type.
	 * Protected by SyncRepLock.
	 */
	SHM_QUEUE	SyncRepQueue[NUM_SYNC_REP_WAIT_MODE];

	/*
	 * Current location of the head of the queue. All waiters should have a
	 * waitLSN that follows this value. Protected by SyncRepLock.
	 */
	XLogRecPtr	lsn[NUM_SYNC_REP_WAIT_MODE];

	/*
	 * Are any sync standbys defined?  Waiting backends can't reload the
	 * config file safely, so checkpointer updates this value as needed.
	 * Protected by SyncRepLock.
	 */
	bool		sync_standbys_defined;

	WalSnd		walsnds[FLEXIBLE_ARRAY_MEMBER];
} WalSndCtlData;

extern WalSndCtlData *WalSndCtl;


extern void WalSndSetState(WalSndState state);

/*
 * Internal functions for parsing the replication grammar, in repl_gram.y and
 * repl_scanner.l
 */
extern int	replication_yyparse(void);
extern int	replication_yylex(void);
extern void replication_yyerror(const char *str) pg_attribute_noreturn();
extern void replication_scanner_init(const char *query_string);
extern void replication_scanner_finish(void);

extern Node *replication_parse_result;

#endif							/* _WALSENDER_PRIVATE_H */
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`/*-------------------------------------------------------------------------`
			`*`
			`* walsender_private.h`
			`* Private definitions from replication/walsender.c.`
			`*`
Update copyright for 2018 Backpatch-through: certain files through 9.3 2018-01-03 05:30:12 +01:00			`* Portions Copyright (c) 2010-2018, PostgreSQL Global Development Group`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`*`
			`* src/include/replication/walsender_private.h`
			`*`
			`*-------------------------------------------------------------------------`
			`*/`
			`#ifndef _WALSENDER_PRIVATE_H`
			`#define _WALSENDER_PRIVATE_H`

			`#include "access/xlog.h"`
			`#include "nodes/nodes.h"`
Add new replication mode synchronous_commit = 'write'. Replication occurs only to memory on standby, not to disk, so provides additional performance if user wishes to reduce durability level slightly. Adds concept of multiple independent sync rep queues. Fujii Masao and Simon Riggs 2012-01-24 21:22:37 +01:00			`#include "replication/syncrep.h"`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`#include "storage/latch.h"`
			`#include "storage/shmem.h"`
			`#include "storage/spin.h"`

			`typedef enum WalSndState`
			`{`
			`WALSNDSTATE_STARTUP = 0,`
			`WALSNDSTATE_BACKUP,`
			`WALSNDSTATE_CATCHUP,`
Prevent possibility of panics during shutdown checkpoint. When the checkpointer writes the shutdown checkpoint, it checks afterwards whether any WAL has been written since it started and throws a PANIC if so. At that point, only walsenders are still active, so one might think this could not happen, but walsenders can also generate WAL, for instance in BASE_BACKUP and logical decoding related commands (e.g. via hint bits). So they can trigger this panic if such a command is run while the shutdown checkpoint is being written. To fix this, divide the walsender shutdown into two phases. First, checkpointer, itself triggered by postmaster, sends a PROCSIG_WALSND_INIT_STOPPING signal to all walsenders. If the backend is idle or runs an SQL query this causes the backend to shutdown, if logical replication is in progress all existing WAL records are processed followed by a shutdown. Otherwise this causes the walsender to switch to the "stopping" state. In this state, the walsender will reject any further replication commands. The checkpointer begins the shutdown checkpoint once all walsenders are confirmed as stopping. When the shutdown checkpoint finishes, the postmaster sends us SIGUSR2. This instructs walsender to send any outstanding WAL, including the shutdown checkpoint record, wait for it to be replicated to the standby, and then exit. Author: Andres Freund, based on an earlier patch by Michael Paquier Reported-By: Fujii Masao, Andres Freund Reviewed-By: Michael Paquier Discussion: https://postgr.es/m/20170602002912.tqlwn4gymzlxpvs2@alap3.anarazel.de Backpatch: 9.4, where logical decoding was introduced 2017-06-06 03:53:41 +02:00			`WALSNDSTATE_STREAMING,`
			`WALSNDSTATE_STOPPING`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`} WalSndState;`

			`/*`
			`* Each walsender has a WalSnd struct in shared memory.`
Fix locking in WAL receiver/sender shmem state structs In WAL receiver and WAL server, some accesses to their corresponding shared memory control structs were done without holding any kind of lock, which could lead to inconsistent and possibly insecure results. In walsender, fix by clarifying the locking rules and following them correctly, as documented in the new comment in walsender_private.h; namely that some members can be read in walsender itself without a lock, because the only writes occur in the same process. The rest of the struct requires spinlock for accesses, as usual. In walreceiver, fix by always holding spinlock while accessing the struct. While there is potentially a problem in all branches, it is minor in stable ones. This only became a real problem in pg10 because of quorum commit in synchronous replication (commit 3901fd70cc7c), and a potential security problem in walreceiver because a superuser() check was removed by default monitoring roles (commit 25fff40798fc). Thus, no backpatch. In passing, clean up some leftover braces which were used to create unconditional blocks. Once upon a time these were used for volatile-izing accesses to those shmem structs, which is no longer required. Many other occurrences of this pattern remain. Author: Michaël Paquier Reported-by: Michaël Paquier Reviewed-by: Masahiko Sawada, Kyotaro Horiguchi, Thomas Munro, Robert Haas Discussion: https://postgr.es/m/CAB7nPqTWYqtzD=LN_oDaf9r-hAjUEPAy0B9yRkhcsLdRN8fzrw@mail.gmail.com 2017-07-01 00:06:33 +02:00			`*`
			`* This struct is protected by 'mutex', with two exceptions: one is`
			`* sync_standby_priority as noted below. The other exception is that some`
			`* members are only written by the walsender process itself, and thus that`
			`* process is free to read those members without holding spinlock. pid and`
			`* needreload always require the spinlock to be held for all accesses.`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`*/`
			`typedef struct WalSnd`
			`{`
Fix locking in WAL receiver/sender shmem state structs In WAL receiver and WAL server, some accesses to their corresponding shared memory control structs were done without holding any kind of lock, which could lead to inconsistent and possibly insecure results. In walsender, fix by clarifying the locking rules and following them correctly, as documented in the new comment in walsender_private.h; namely that some members can be read in walsender itself without a lock, because the only writes occur in the same process. The rest of the struct requires spinlock for accesses, as usual. In walreceiver, fix by always holding spinlock while accessing the struct. While there is potentially a problem in all branches, it is minor in stable ones. This only became a real problem in pg10 because of quorum commit in synchronous replication (commit 3901fd70cc7c), and a potential security problem in walreceiver because a superuser() check was removed by default monitoring roles (commit 25fff40798fc). Thus, no backpatch. In passing, clean up some leftover braces which were used to create unconditional blocks. Once upon a time these were used for volatile-izing accesses to those shmem structs, which is no longer required. Many other occurrences of this pattern remain. Author: Michaël Paquier Reported-by: Michaël Paquier Reviewed-by: Masahiko Sawada, Kyotaro Horiguchi, Thomas Munro, Robert Haas Discussion: https://postgr.es/m/CAB7nPqTWYqtzD=LN_oDaf9r-hAjUEPAy0B9yRkhcsLdRN8fzrw@mail.gmail.com 2017-07-01 00:06:33 +02:00			`pid_t pid; /* this walsender's PID, or 0 if not active */`

Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`WalSndState state; /* this walsender's state */`
			`XLogRecPtr sentPtr; /* WAL has been sent up to this point */`
Run pgindent on 9.2 source tree in preparation for first 9.3 commit-fest. 2012-06-10 21:20:04 +02:00			`bool needreload; /* does currently-open file need to be`
			`* reloaded? */`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00
			`/*`
			`* The xlog locations that have been written, flushed, and applied by`
			`* standby-side. These may be invalid if the standby-side has not offered`
			`* values yet.`
			`*/`
			`XLogRecPtr write;`
			`XLogRecPtr flush;`
			`XLogRecPtr apply;`

Replication lag tracking for walsenders Adds write_lag, flush_lag and replay_lag cols to pg_stat_replication. Implements a lag tracker module that reports the lag times based upon measurements of the time taken for recent WAL to be written, flushed and replayed and for the sender to hear about it. These times represent the commit lag that was (or would have been) introduced by each synchronous commit level, if the remote server was configured as a synchronous standby. For an asynchronous standby, the replay_lag column approximates the delay before recent transactions became visible to queries. If the standby server has entirely caught up with the sending server and there is no more WAL activity, the most recently measured lag times will continue to be displayed for a short time and then show NULL. Physical replication lag tracking is automatic. Logical replication tracking is possible but is the responsibility of the logical decoding plugin. Tracking is a private module operating within each walsender individually, with values reported to shared memory. Module not used outside of walsender. Design and code is good enough now to commit - kudos to the author. In many ways a difficult topic, with important and subtle behaviour so this shoudl be expected to generate discussion and multiple open items: Test now! Author: Thomas Munro, following designs by Fujii Masao and Simon Riggs Review: Simon Riggs, Ian Barwick and Craig Ringer 2017-03-23 15:05:28 +01:00			`/* Measured lag times, or -1 for unknown/none. */`
			`TimeOffset writeLag;`
			`TimeOffset flushLag;`
			`TimeOffset applyLag;`

Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`/* Protects shared variables shown above. */`
			`slock_t mutex;`

			`/*`
Replace walsender's latch with the general shared latch. Relying on the normal shared latch simplifies interrupt/signal handling because we can rely on all signal handlers setting the proc latch. That in turn allows us to avoid the use of ImmediateInterruptOK, which arguably isn't correct because WaitLatchOrSocket isn't declared to be immediately interruptible. Also change sections that wait on the walsender's latch to notice interrupts quicker/more reliably and make them more consistent with each other. This is part of a larger "get rid of ImmediateInterruptOK" series. Discussion: 20150115020335.GZ5245@awork2.anarazel.de 2015-01-17 13:00:42 +01:00			`* Pointer to the walsender's latch. Used by backends to wake up this`
			`* walsender when it has work to do. NULL if the walsender isn't active.`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`*/`
Use FLEXIBLE_ARRAY_MEMBER in a bunch more places. Replace some bogus "x[1]" declarations with "x[FLEXIBLE_ARRAY_MEMBER]". Aside from being more self-documenting, this should help prevent bogus warnings from static code analyzers and perhaps compiler misoptimizations. This patch is just a down payment on eliminating the whole problem, but it gets rid of a lot of easy-to-fix cases. Note that the main problem with doing this is that one must no longer rely on computing sizeof(the containing struct), since the result would be compiler-dependent. Instead use offsetof(struct, lastfield). Autoconf also warns against spelling that offsetof(struct, lastfield[0]). Michael Paquier, review and additional fixes by me. 2015-02-20 06:11:42 +01:00			`Latch *latch;`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00
			`/*`
			`* The priority order of the standby managed by this WALSender, as listed`
			`* in synchronous_standby_names, or 0 if not-listed. Protected by`
			`* SyncRepLock.`
			`*/`
			`int sync_standby_priority;`
Add timestamp of last received message from standby to pg_stat_replication The timestamp generated by the standby at message transmission has been included in the protocol since its introduction for both the status update message and hot standby feedback message, but it has never appeared in pg_stat_replication. Seeing this timestamp does not matter much with a cluster which has a lot of activity, but on a mostly-idle cluster, this makes monitoring able to react faster than the configured timeouts. Author: MyungKyu LIM Reviewed-by: Michael Paquier, Masahiko Sawada Discussion: https://postgr.es/m/1657809367.407321.1533027417725.JavaMail.jboss@ep2ml404 2018-12-09 08:35:06 +01:00
			`/*`
			`* Timestamp of the last message received from standby.`
			`*/`
			`TimestampTz replyTime;`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`} WalSnd;`

			`extern WalSnd *MyWalSnd;`

			`/* There is one WalSndCtl struct for the whole database cluster */`
			`typedef struct`
			`{`
			`/*`
Add new replication mode synchronous_commit = 'write'. Replication occurs only to memory on standby, not to disk, so provides additional performance if user wishes to reduce durability level slightly. Adds concept of multiple independent sync rep queues. Fujii Masao and Simon Riggs 2012-01-24 21:22:37 +01:00			`* Synchronous replication queue with one queue per request type.`
			`* Protected by SyncRepLock.`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`*/`
Add new replication mode synchronous_commit = 'write'. Replication occurs only to memory on standby, not to disk, so provides additional performance if user wishes to reduce durability level slightly. Adds concept of multiple independent sync rep queues. Fujii Masao and Simon Riggs 2012-01-24 21:22:37 +01:00			`SHM_QUEUE SyncRepQueue[NUM_SYNC_REP_WAIT_MODE];`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00
			`/*`
			`* Current location of the head of the queue. All waiters should have a`
			`* waitLSN that follows this value. Protected by SyncRepLock.`
			`*/`
Add new replication mode synchronous_commit = 'write'. Replication occurs only to memory on standby, not to disk, so provides additional performance if user wishes to reduce durability level slightly. Adds concept of multiple independent sync rep queues. Fujii Masao and Simon Riggs 2012-01-24 21:22:37 +01:00			`XLogRecPtr lsn[NUM_SYNC_REP_WAIT_MODE];`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00
			`/*`
			`* Are any sync standbys defined? Waiting backends can't reload the`
Various minor comments changes from bgwriter to checkpointer. 2012-01-30 15:34:25 +01:00			`* config file safely, so checkpointer updates this value as needed.`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`* Protected by SyncRepLock.`
			`*/`
			`bool sync_standbys_defined;`

Use FLEXIBLE_ARRAY_MEMBER in a bunch more places. Replace some bogus "x[1]" declarations with "x[FLEXIBLE_ARRAY_MEMBER]". Aside from being more self-documenting, this should help prevent bogus warnings from static code analyzers and perhaps compiler misoptimizations. This patch is just a down payment on eliminating the whole problem, but it gets rid of a lot of easy-to-fix cases. Note that the main problem with doing this is that one must no longer rely on computing sizeof(the containing struct), since the result would be compiler-dependent. Instead use offsetof(struct, lastfield). Autoconf also warns against spelling that offsetof(struct, lastfield[0]). Michael Paquier, review and additional fixes by me. 2015-02-20 06:11:42 +01:00			`WalSnd walsnds[FLEXIBLE_ARRAY_MEMBER];`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`} WalSndCtlData;`

			`extern WalSndCtlData *WalSndCtl;`


			`extern void WalSndSetState(WalSndState state);`

			`/*`
			`* Internal functions for parsing the replication grammar, in repl_gram.y and`
			`* repl_scanner.l`
			`*/`
			`extern int replication_yyparse(void);`
			`extern int replication_yylex(void);`
Tweak __attribute__-wrapping macros for better pgindent results. This improves on commit bbfd7edae5aa5ad5553d3c7e102f2e450d4380d4 by making two simple changes: * pg_attribute_noreturn now takes parentheses, ie pg_attribute_noreturn(). Likewise pg_attribute_unused(), pg_attribute_packed(). This reduces pgindent's tendency to misformat declarations involving them. * attributes are now always attached to function declarations, not definitions. Previously some places were taking creative shortcuts, which were not merely candidates for bad misformatting by pgindent but often were outright wrong anyway. (It does little good to put a noreturn annotation where callers can't see it.) In any case, if we would like to believe that these macros can be used with non-gcc compilers, we should avoid gratuitous variance in usage patterns. I also went through and manually improved the formatting of a lot of declarations, and got rid of excessively repetitive (and now obsolete anyway) comments informing the reader what pg_attribute_printf is for. 2015-03-26 19:03:19 +01:00			`extern void replication_yyerror(const char *str) pg_attribute_noreturn();`
Split walsender.h in public/private headers This dramatically cuts short the number of headers the public one brings into whatever includes it. 2011-09-12 20:24:29 +02:00			`extern void replication_scanner_init(const char *query_string);`
			`extern void replication_scanner_finish(void);`

			`extern Node *replication_parse_result;`

Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us 2017-06-21 21:18:54 +02:00			`#endif /* _WALSENDER_PRIVATE_H */`