2021-04-03 04:45:24 +02:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
* wait_event.h
|
|
|
|
* Definitions related to wait event reporting
|
|
|
|
*
|
2023-01-02 21:00:37 +01:00
|
|
|
* Copyright (c) 2001-2023, PostgreSQL Global Development Group
|
2021-04-03 04:45:24 +02:00
|
|
|
*
|
|
|
|
* src/include/utils/wait_event.h
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
#ifndef WAIT_EVENT_H
|
|
|
|
#define WAIT_EVENT_H
|
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Wait Classes
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
#define PG_WAIT_LWLOCK 0x01000000U
|
|
|
|
#define PG_WAIT_LOCK 0x03000000U
|
|
|
|
#define PG_WAIT_BUFFER_PIN 0x04000000U
|
|
|
|
#define PG_WAIT_ACTIVITY 0x05000000U
|
|
|
|
#define PG_WAIT_CLIENT 0x06000000U
|
|
|
|
#define PG_WAIT_EXTENSION 0x07000000U
|
|
|
|
#define PG_WAIT_IPC 0x08000000U
|
|
|
|
#define PG_WAIT_TIMEOUT 0x09000000U
|
|
|
|
#define PG_WAIT_IO 0x0A000000U
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Wait Events - Activity
|
|
|
|
*
|
|
|
|
* Use this category when a process is waiting because it has no work to do,
|
|
|
|
* unless the "Client" or "Timeout" category describes the situation better.
|
|
|
|
* Typically, this should only be used for background processes.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
WAIT_EVENT_ARCHIVER_MAIN = PG_WAIT_ACTIVITY,
|
|
|
|
WAIT_EVENT_AUTOVACUUM_MAIN,
|
|
|
|
WAIT_EVENT_BGWRITER_HIBERNATE,
|
|
|
|
WAIT_EVENT_BGWRITER_MAIN,
|
|
|
|
WAIT_EVENT_CHECKPOINTER_MAIN,
|
|
|
|
WAIT_EVENT_LOGICAL_APPLY_MAIN,
|
|
|
|
WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
|
Perform apply of large transactions by parallel workers.
Currently, for large transactions, the publisher sends the data in
multiple streams (changes divided into chunks depending upon
logical_decoding_work_mem), and then on the subscriber-side, the apply
worker writes the changes into temporary files and once it receives the
commit, it reads from those files and applies the entire transaction. To
improve the performance of such transactions, we can instead allow them to
be applied via parallel workers.
In this approach, we assign a new parallel apply worker (if available) as
soon as the xact's first stream is received and the leader apply worker
will send changes to this new worker via shared memory. The parallel apply
worker will directly apply the change instead of writing it to temporary
files. However, if the leader apply worker times out while attempting to
send a message to the parallel apply worker, it will switch to
"partial serialize" mode - in this mode, the leader serializes all
remaining changes to a file and notifies the parallel apply workers to
read and apply them at the end of the transaction. We use a non-blocking
way to send the messages from the leader apply worker to the parallel
apply to avoid deadlocks. We keep this parallel apply assigned till the
transaction commit is received and also wait for the worker to finish at
commit. This preserves commit ordering and avoid writing to and reading
from files in most cases. We still need to spill if there is no worker
available.
This patch also extends the SUBSCRIPTION 'streaming' parameter so that the
user can control whether to apply the streaming transaction in a parallel
apply worker or spill the change to disk. The user can set the streaming
parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means
the streaming will be applied via a parallel apply worker, if available.
The parameter value 'on' means the streaming transaction will be spilled
to disk. The default value is 'off' (same as current behaviour).
In addition, the patch extends the logical replication STREAM_ABORT
message so that abort_lsn and abort_time can also be sent which can be
used to update the replication origin in parallel apply worker when the
streaming transaction is aborted. Because this message extension is needed
to support parallel streaming, parallel streaming is not supported for
publications on servers < PG16.
Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko
Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik
Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
2023-01-09 02:30:39 +01:00
|
|
|
WAIT_EVENT_LOGICAL_PARALLEL_APPLY_MAIN,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_RECOVERY_WAL_STREAM,
|
|
|
|
WAIT_EVENT_SYSLOGGER_MAIN,
|
|
|
|
WAIT_EVENT_WAL_RECEIVER_MAIN,
|
|
|
|
WAIT_EVENT_WAL_SENDER_MAIN,
|
|
|
|
WAIT_EVENT_WAL_WRITER_MAIN
|
|
|
|
} WaitEventActivity;
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Wait Events - Client
|
|
|
|
*
|
|
|
|
* Use this category when a process is waiting to send data to or receive data
|
|
|
|
* from the frontend process to which it is connected. This is never used for
|
|
|
|
* a background process, which has no client connection.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
WAIT_EVENT_CLIENT_READ = PG_WAIT_CLIENT,
|
|
|
|
WAIT_EVENT_CLIENT_WRITE,
|
|
|
|
WAIT_EVENT_GSS_OPEN_SERVER,
|
|
|
|
WAIT_EVENT_LIBPQWALRECEIVER_CONNECT,
|
|
|
|
WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE,
|
|
|
|
WAIT_EVENT_SSL_OPEN_SERVER,
|
|
|
|
WAIT_EVENT_WAL_SENDER_WAIT_WAL,
|
|
|
|
WAIT_EVENT_WAL_SENDER_WRITE_DATA,
|
|
|
|
} WaitEventClient;
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Wait Events - IPC
|
|
|
|
*
|
|
|
|
* Use this category when a process cannot complete the work it is doing because
|
|
|
|
* it is waiting for a notification from another process.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
WAIT_EVENT_APPEND_READY = PG_WAIT_IPC,
|
2021-11-22 02:28:21 +01:00
|
|
|
WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND,
|
|
|
|
WAIT_EVENT_ARCHIVE_COMMAND,
|
2021-04-08 11:32:14 +02:00
|
|
|
WAIT_EVENT_BACKEND_TERMINATION,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE,
|
|
|
|
WAIT_EVENT_BGWORKER_SHUTDOWN,
|
|
|
|
WAIT_EVENT_BGWORKER_STARTUP,
|
|
|
|
WAIT_EVENT_BTREE_PAGE,
|
|
|
|
WAIT_EVENT_BUFFER_IO,
|
|
|
|
WAIT_EVENT_CHECKPOINT_DONE,
|
|
|
|
WAIT_EVENT_CHECKPOINT_START,
|
|
|
|
WAIT_EVENT_EXECUTE_GATHER,
|
|
|
|
WAIT_EVENT_HASH_BATCH_ALLOCATE,
|
|
|
|
WAIT_EVENT_HASH_BATCH_ELECT,
|
|
|
|
WAIT_EVENT_HASH_BATCH_LOAD,
|
|
|
|
WAIT_EVENT_HASH_BUILD_ALLOCATE,
|
|
|
|
WAIT_EVENT_HASH_BUILD_ELECT,
|
|
|
|
WAIT_EVENT_HASH_BUILD_HASH_INNER,
|
|
|
|
WAIT_EVENT_HASH_BUILD_HASH_OUTER,
|
|
|
|
WAIT_EVENT_HASH_GROW_BATCHES_DECIDE,
|
|
|
|
WAIT_EVENT_HASH_GROW_BATCHES_ELECT,
|
|
|
|
WAIT_EVENT_HASH_GROW_BATCHES_FINISH,
|
2023-03-23 00:39:43 +01:00
|
|
|
WAIT_EVENT_HASH_GROW_BATCHES_REALLOCATE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION,
|
|
|
|
WAIT_EVENT_HASH_GROW_BUCKETS_ELECT,
|
2023-03-23 00:39:43 +01:00
|
|
|
WAIT_EVENT_HASH_GROW_BUCKETS_REALLOCATE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_HASH_GROW_BUCKETS_REINSERT,
|
2023-02-16 03:16:31 +01:00
|
|
|
WAIT_EVENT_LOGICAL_APPLY_SEND_DATA,
|
Perform apply of large transactions by parallel workers.
Currently, for large transactions, the publisher sends the data in
multiple streams (changes divided into chunks depending upon
logical_decoding_work_mem), and then on the subscriber-side, the apply
worker writes the changes into temporary files and once it receives the
commit, it reads from those files and applies the entire transaction. To
improve the performance of such transactions, we can instead allow them to
be applied via parallel workers.
In this approach, we assign a new parallel apply worker (if available) as
soon as the xact's first stream is received and the leader apply worker
will send changes to this new worker via shared memory. The parallel apply
worker will directly apply the change instead of writing it to temporary
files. However, if the leader apply worker times out while attempting to
send a message to the parallel apply worker, it will switch to
"partial serialize" mode - in this mode, the leader serializes all
remaining changes to a file and notifies the parallel apply workers to
read and apply them at the end of the transaction. We use a non-blocking
way to send the messages from the leader apply worker to the parallel
apply to avoid deadlocks. We keep this parallel apply assigned till the
transaction commit is received and also wait for the worker to finish at
commit. This preserves commit ordering and avoid writing to and reading
from files in most cases. We still need to spill if there is no worker
available.
This patch also extends the SUBSCRIPTION 'streaming' parameter so that the
user can control whether to apply the streaming transaction in a parallel
apply worker or spill the change to disk. The user can set the streaming
parameter to 'on/off', or 'parallel'. The parameter value 'parallel' means
the streaming will be applied via a parallel apply worker, if available.
The parameter value 'on' means the streaming transaction will be spilled
to disk. The default value is 'off' (same as current behaviour).
In addition, the patch extends the logical replication STREAM_ABORT
message so that abort_lsn and abort_time can also be sent which can be
used to update the replication origin in parallel apply worker when the
streaming transaction is aborted. Because this message extension is needed
to support parallel streaming, parallel streaming is not supported for
publications on servers < PG16.
Author: Hou Zhijie, Wang wei, Amit Kapila with design inputs from Sawada Masahiko
Reviewed-by: Sawada Masahiko, Peter Smith, Dilip Kumar, Shi yu, Kuroda Hayato, Shveta Mallik
Discussion: https://postgr.es/m/CAA4eK1+wyN6zpaHUkCLorEWNx75MG0xhMwcFhvjqm2KURZEAGw@mail.gmail.com
2023-01-09 02:30:39 +01:00
|
|
|
WAIT_EVENT_LOGICAL_PARALLEL_APPLY_STATE_CHANGE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_LOGICAL_SYNC_DATA,
|
|
|
|
WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE,
|
|
|
|
WAIT_EVENT_MQ_INTERNAL,
|
|
|
|
WAIT_EVENT_MQ_PUT_MESSAGE,
|
|
|
|
WAIT_EVENT_MQ_RECEIVE,
|
|
|
|
WAIT_EVENT_MQ_SEND,
|
|
|
|
WAIT_EVENT_PARALLEL_BITMAP_SCAN,
|
|
|
|
WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN,
|
|
|
|
WAIT_EVENT_PARALLEL_FINISH,
|
|
|
|
WAIT_EVENT_PROCARRAY_GROUP_UPDATE,
|
|
|
|
WAIT_EVENT_PROC_SIGNAL_BARRIER,
|
|
|
|
WAIT_EVENT_PROMOTE,
|
|
|
|
WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT,
|
|
|
|
WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE,
|
2021-11-22 02:28:21 +01:00
|
|
|
WAIT_EVENT_RECOVERY_END_COMMAND,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_RECOVERY_PAUSE,
|
|
|
|
WAIT_EVENT_REPLICATION_ORIGIN_DROP,
|
|
|
|
WAIT_EVENT_REPLICATION_SLOT_DROP,
|
2021-11-22 02:28:21 +01:00
|
|
|
WAIT_EVENT_RESTORE_COMMAND,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_SAFE_SNAPSHOT,
|
|
|
|
WAIT_EVENT_SYNC_REP,
|
|
|
|
WAIT_EVENT_WAL_RECEIVER_EXIT,
|
|
|
|
WAIT_EVENT_WAL_RECEIVER_WAIT_START,
|
|
|
|
WAIT_EVENT_XACT_GROUP_UPDATE
|
|
|
|
} WaitEventIPC;
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Wait Events - Timeout
|
|
|
|
*
|
|
|
|
* Use this category when a process is waiting for a timeout to expire.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
WAIT_EVENT_BASE_BACKUP_THROTTLE = PG_WAIT_TIMEOUT,
|
2022-03-16 01:37:58 +01:00
|
|
|
WAIT_EVENT_CHECKPOINT_WRITE_DELAY,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_PG_SLEEP,
|
|
|
|
WAIT_EVENT_RECOVERY_APPLY_DELAY,
|
|
|
|
WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
|
2022-03-16 03:35:16 +01:00
|
|
|
WAIT_EVENT_REGISTER_SYNC_REQUEST,
|
2022-11-22 05:34:17 +01:00
|
|
|
WAIT_EVENT_SPIN_DELAY,
|
2021-07-02 05:58:34 +02:00
|
|
|
WAIT_EVENT_VACUUM_DELAY,
|
|
|
|
WAIT_EVENT_VACUUM_TRUNCATE
|
2021-04-03 04:45:24 +02:00
|
|
|
} WaitEventTimeout;
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* Wait Events - IO
|
|
|
|
*
|
|
|
|
* Use this category when a process is waiting for a IO.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
typedef enum
|
|
|
|
{
|
|
|
|
WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO,
|
Support base backup targets.
pg_basebackup now has a --target=TARGET[:DETAIL] option. If specfied,
it is sent to the server as the value of the TARGET option to the
BASE_BACKUP command. If DETAIL is included, it is sent as the value of
the new TARGET_DETAIL option to the BASE_BACKUP command. If the
target is anything other than 'client', pg_basebackup assumes that it
will now be the server's job to write the backup in a location somehow
defined by the target, and that it therefore needs to write nothing
locally. However, the server will still send messages to the client
for progress reporting purposes.
On the server side, we now support two additional types of backup
targets. There is a 'blackhole' target, which just throws away the
backup data without doing anything at all with it. Naturally, this
should only be used for testing and debugging purposes, since you will
not actually have a backup when it finishes running. More usefully,
there is also a 'server' target, so you can now use something like
'pg_basebackup -Xnone -t server:/SOME/PATH' to write a backup to some
location on the server. We can extend this to more types of targets
in the future, and might even want to create an extensibility
mechanism for adding new target types.
Since WAL fetching is handled with separate client-side logic, it's
not part of this mechanism; thus, backups with non-default targets
must use -Xnone or -Xfetch.
Patch by me, with a bug fix by Jeevan Ladhe. The patch set of which
this is a part has also had review and/or testing from Tushar Ahuja,
Suraj Kharage, Dipesh Pandit, and Mark Dilger.
Discussion: http://postgr.es/m/CA+TgmoaYZbz0=Yk797aOJwkGJC-LK3iXn+wzzMx7KdwNpZhS5g@mail.gmail.com
2021-11-16 21:20:50 +01:00
|
|
|
WAIT_EVENT_BASEBACKUP_SYNC,
|
|
|
|
WAIT_EVENT_BASEBACKUP_WRITE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_BUFFILE_READ,
|
|
|
|
WAIT_EVENT_BUFFILE_WRITE,
|
|
|
|
WAIT_EVENT_BUFFILE_TRUNCATE,
|
|
|
|
WAIT_EVENT_CONTROL_FILE_READ,
|
|
|
|
WAIT_EVENT_CONTROL_FILE_SYNC,
|
|
|
|
WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE,
|
|
|
|
WAIT_EVENT_CONTROL_FILE_WRITE,
|
|
|
|
WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE,
|
|
|
|
WAIT_EVENT_COPY_FILE_READ,
|
|
|
|
WAIT_EVENT_COPY_FILE_WRITE,
|
|
|
|
WAIT_EVENT_DATA_FILE_EXTEND,
|
|
|
|
WAIT_EVENT_DATA_FILE_FLUSH,
|
|
|
|
WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC,
|
|
|
|
WAIT_EVENT_DATA_FILE_PREFETCH,
|
|
|
|
WAIT_EVENT_DATA_FILE_READ,
|
|
|
|
WAIT_EVENT_DATA_FILE_SYNC,
|
|
|
|
WAIT_EVENT_DATA_FILE_TRUNCATE,
|
|
|
|
WAIT_EVENT_DATA_FILE_WRITE,
|
2022-07-14 12:52:13 +02:00
|
|
|
WAIT_EVENT_DSM_ALLOCATE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_DSM_FILL_ZERO_WRITE,
|
|
|
|
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ,
|
|
|
|
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC,
|
|
|
|
WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE,
|
|
|
|
WAIT_EVENT_LOCK_FILE_CREATE_READ,
|
|
|
|
WAIT_EVENT_LOCK_FILE_CREATE_SYNC,
|
|
|
|
WAIT_EVENT_LOCK_FILE_CREATE_WRITE,
|
|
|
|
WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ,
|
|
|
|
WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC,
|
|
|
|
WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC,
|
|
|
|
WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE,
|
|
|
|
WAIT_EVENT_LOGICAL_REWRITE_SYNC,
|
|
|
|
WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE,
|
|
|
|
WAIT_EVENT_LOGICAL_REWRITE_WRITE,
|
|
|
|
WAIT_EVENT_RELATION_MAP_READ,
|
2022-07-26 20:56:25 +02:00
|
|
|
WAIT_EVENT_RELATION_MAP_REPLACE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_RELATION_MAP_WRITE,
|
|
|
|
WAIT_EVENT_REORDER_BUFFER_READ,
|
|
|
|
WAIT_EVENT_REORDER_BUFFER_WRITE,
|
|
|
|
WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ,
|
|
|
|
WAIT_EVENT_REPLICATION_SLOT_READ,
|
|
|
|
WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC,
|
|
|
|
WAIT_EVENT_REPLICATION_SLOT_SYNC,
|
|
|
|
WAIT_EVENT_REPLICATION_SLOT_WRITE,
|
|
|
|
WAIT_EVENT_SLRU_FLUSH_SYNC,
|
|
|
|
WAIT_EVENT_SLRU_READ,
|
|
|
|
WAIT_EVENT_SLRU_SYNC,
|
|
|
|
WAIT_EVENT_SLRU_WRITE,
|
|
|
|
WAIT_EVENT_SNAPBUILD_READ,
|
|
|
|
WAIT_EVENT_SNAPBUILD_SYNC,
|
|
|
|
WAIT_EVENT_SNAPBUILD_WRITE,
|
|
|
|
WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC,
|
|
|
|
WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE,
|
|
|
|
WAIT_EVENT_TIMELINE_HISTORY_READ,
|
|
|
|
WAIT_EVENT_TIMELINE_HISTORY_SYNC,
|
|
|
|
WAIT_EVENT_TIMELINE_HISTORY_WRITE,
|
|
|
|
WAIT_EVENT_TWOPHASE_FILE_READ,
|
|
|
|
WAIT_EVENT_TWOPHASE_FILE_SYNC,
|
|
|
|
WAIT_EVENT_TWOPHASE_FILE_WRITE,
|
Add new block-by-block strategy for CREATE DATABASE.
Because this strategy logs changes on a block-by-block basis, it
avoids the need to checkpoint before and after the operation.
However, because it logs each changed block individually, it might
generate a lot of extra write-ahead logging if the template database
is large. Therefore, the older strategy remains available via a new
STRATEGY parameter to CREATE DATABASE, and a corresponding --strategy
option to createdb.
Somewhat controversially, this patch assembles the list of relations
to be copied to the new database by reading the pg_class relation of
the template database. Cross-database access like this isn't normally
possible, but it can be made to work here because there can't be any
connections to the database being copied, nor can it contain any
in-doubt transactions. Even so, we have to use lower-level interfaces
than normal, since the table scan and relcache interfaces will not
work for a database to which we're not connected. The advantage of
this approach is that we do not need to rely on the filesystem to
determine what ought to be copied, but instead on PostgreSQL's own
knowledge of the database structure. This avoids, for example,
copying stray files that happen to be located in the source database
directory.
Dilip Kumar, with a fairly large number of cosmetic changes by me.
Reviewed and tested by Ashutosh Sharma, Andres Freund, John Naylor,
Greg Nancarrow, Neha Sharma. Additional feedback from Bruce Momjian,
Heikki Linnakangas, Julien Rouhaud, Adam Brusselback, Kyotaro
Horiguchi, Tomas Vondra, Andrew Dunstan, Álvaro Herrera, and others.
Discussion: http://postgr.es/m/CA+TgmoYtcdxBjLh31DLxUXHxFVMPGzrU5_T=CYCvRyFHywSBUQ@mail.gmail.com
2022-03-29 17:31:43 +02:00
|
|
|
WAIT_EVENT_VERSION_FILE_WRITE,
|
2021-04-03 04:45:24 +02:00
|
|
|
WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ,
|
|
|
|
WAIT_EVENT_WAL_BOOTSTRAP_SYNC,
|
|
|
|
WAIT_EVENT_WAL_BOOTSTRAP_WRITE,
|
|
|
|
WAIT_EVENT_WAL_COPY_READ,
|
|
|
|
WAIT_EVENT_WAL_COPY_SYNC,
|
|
|
|
WAIT_EVENT_WAL_COPY_WRITE,
|
|
|
|
WAIT_EVENT_WAL_INIT_SYNC,
|
|
|
|
WAIT_EVENT_WAL_INIT_WRITE,
|
|
|
|
WAIT_EVENT_WAL_READ,
|
|
|
|
WAIT_EVENT_WAL_SYNC,
|
|
|
|
WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN,
|
2021-10-21 04:31:25 +02:00
|
|
|
WAIT_EVENT_WAL_WRITE
|
2021-04-03 04:45:24 +02:00
|
|
|
} WaitEventIO;
|
|
|
|
|
|
|
|
|
|
|
|
extern const char *pgstat_get_wait_event(uint32 wait_event_info);
|
|
|
|
extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
|
|
|
|
static inline void pgstat_report_wait_start(uint32 wait_event_info);
|
|
|
|
static inline void pgstat_report_wait_end(void);
|
Improve efficiency of wait event reporting, remove proc.h dependency.
pgstat_report_wait_start() and pgstat_report_wait_end() required two
conditional branches so far. One to check if MyProc is NULL, the other to
check if pgstat_track_activities is set. As wait events are used around
comparatively lightweight operations, and are inlined (reducing branch
predictor effectiveness), that's not great.
The dependency on MyProc has a second disadvantage: Low-level subsystems, like
storage/file/fd.c, report wait events, but architecturally it is preferable
for them to not depend on inter-process subsystems like proc.h (defining
PGPROC). After this change including pgstat.h (nor obviously its
sub-components like backend_status.h, wait_event.h, ...) does not pull in IPC
related headers anymore.
These goals, efficiency and abstraction, are achieved by having
pgstat_report_wait_start/end() not interact with MyProc, but instead a new
my_wait_event_info variable. At backend startup it points to a local variable,
removing the need to check for MyProc being NULL. During process
initialization my_wait_event_info is redirected to MyProc->wait_event_info. At
shutdown this is reversed. Because wait event reporting now does not need to
know about where the wait event is stored, it does not need to know about
PGPROC anymore.
The removal of the branch for checking pgstat_track_activities is simpler:
Don't check anymore. The cost due to the branch are often higher than the
store - and even if not, pgstat_track_activities is rarely disabled.
The main motivator to commit this work now is that removing the (indirect)
pgproc.h include from pgstat.h simplifies a patch to move statistics reporting
to shared memory (which still has a chance to get into 14).
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20210402194458.2vu324hkk2djq6ce@alap3.anarazel.de
2021-04-03 20:44:47 +02:00
|
|
|
extern void pgstat_set_wait_event_storage(uint32 *wait_event_info);
|
|
|
|
extern void pgstat_reset_wait_event_storage(void);
|
2021-04-03 04:45:24 +02:00
|
|
|
|
Improve efficiency of wait event reporting, remove proc.h dependency.
pgstat_report_wait_start() and pgstat_report_wait_end() required two
conditional branches so far. One to check if MyProc is NULL, the other to
check if pgstat_track_activities is set. As wait events are used around
comparatively lightweight operations, and are inlined (reducing branch
predictor effectiveness), that's not great.
The dependency on MyProc has a second disadvantage: Low-level subsystems, like
storage/file/fd.c, report wait events, but architecturally it is preferable
for them to not depend on inter-process subsystems like proc.h (defining
PGPROC). After this change including pgstat.h (nor obviously its
sub-components like backend_status.h, wait_event.h, ...) does not pull in IPC
related headers anymore.
These goals, efficiency and abstraction, are achieved by having
pgstat_report_wait_start/end() not interact with MyProc, but instead a new
my_wait_event_info variable. At backend startup it points to a local variable,
removing the need to check for MyProc being NULL. During process
initialization my_wait_event_info is redirected to MyProc->wait_event_info. At
shutdown this is reversed. Because wait event reporting now does not need to
know about where the wait event is stored, it does not need to know about
PGPROC anymore.
The removal of the branch for checking pgstat_track_activities is simpler:
Don't check anymore. The cost due to the branch are often higher than the
store - and even if not, pgstat_track_activities is rarely disabled.
The main motivator to commit this work now is that removing the (indirect)
pgproc.h include from pgstat.h simplifies a patch to move statistics reporting
to shared memory (which still has a chance to get into 14).
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20210402194458.2vu324hkk2djq6ce@alap3.anarazel.de
2021-04-03 20:44:47 +02:00
|
|
|
extern PGDLLIMPORT uint32 *my_wait_event_info;
|
2021-04-03 04:45:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_report_wait_start() -
|
|
|
|
*
|
|
|
|
* Called from places where server process needs to wait. This is called
|
|
|
|
* to report wait event information. The wait information is stored
|
|
|
|
* as 4-bytes where first byte represents the wait event class (type of
|
|
|
|
* wait, for different types of wait, refer WaitClass) and the next
|
|
|
|
* 3-bytes represent the actual wait event. Currently 2-bytes are used
|
|
|
|
* for wait event which is sufficient for current usage, 1-byte is
|
|
|
|
* reserved for future usage.
|
|
|
|
*
|
Improve efficiency of wait event reporting, remove proc.h dependency.
pgstat_report_wait_start() and pgstat_report_wait_end() required two
conditional branches so far. One to check if MyProc is NULL, the other to
check if pgstat_track_activities is set. As wait events are used around
comparatively lightweight operations, and are inlined (reducing branch
predictor effectiveness), that's not great.
The dependency on MyProc has a second disadvantage: Low-level subsystems, like
storage/file/fd.c, report wait events, but architecturally it is preferable
for them to not depend on inter-process subsystems like proc.h (defining
PGPROC). After this change including pgstat.h (nor obviously its
sub-components like backend_status.h, wait_event.h, ...) does not pull in IPC
related headers anymore.
These goals, efficiency and abstraction, are achieved by having
pgstat_report_wait_start/end() not interact with MyProc, but instead a new
my_wait_event_info variable. At backend startup it points to a local variable,
removing the need to check for MyProc being NULL. During process
initialization my_wait_event_info is redirected to MyProc->wait_event_info. At
shutdown this is reversed. Because wait event reporting now does not need to
know about where the wait event is stored, it does not need to know about
PGPROC anymore.
The removal of the branch for checking pgstat_track_activities is simpler:
Don't check anymore. The cost due to the branch are often higher than the
store - and even if not, pgstat_track_activities is rarely disabled.
The main motivator to commit this work now is that removing the (indirect)
pgproc.h include from pgstat.h simplifies a patch to move statistics reporting
to shared memory (which still has a chance to get into 14).
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20210402194458.2vu324hkk2djq6ce@alap3.anarazel.de
2021-04-03 20:44:47 +02:00
|
|
|
* Historically we used to make this reporting conditional on
|
|
|
|
* pgstat_track_activities, but the check for that seems to add more cost
|
|
|
|
* than it saves.
|
|
|
|
*
|
|
|
|
* my_wait_event_info initially points to local memory, making it safe to
|
|
|
|
* call this before MyProc has been initialized.
|
2021-04-03 04:45:24 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
pgstat_report_wait_start(uint32 wait_event_info)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Since this is a four-byte field which is always read and written as
|
|
|
|
* four-bytes, updates are atomic.
|
|
|
|
*/
|
Improve efficiency of wait event reporting, remove proc.h dependency.
pgstat_report_wait_start() and pgstat_report_wait_end() required two
conditional branches so far. One to check if MyProc is NULL, the other to
check if pgstat_track_activities is set. As wait events are used around
comparatively lightweight operations, and are inlined (reducing branch
predictor effectiveness), that's not great.
The dependency on MyProc has a second disadvantage: Low-level subsystems, like
storage/file/fd.c, report wait events, but architecturally it is preferable
for them to not depend on inter-process subsystems like proc.h (defining
PGPROC). After this change including pgstat.h (nor obviously its
sub-components like backend_status.h, wait_event.h, ...) does not pull in IPC
related headers anymore.
These goals, efficiency and abstraction, are achieved by having
pgstat_report_wait_start/end() not interact with MyProc, but instead a new
my_wait_event_info variable. At backend startup it points to a local variable,
removing the need to check for MyProc being NULL. During process
initialization my_wait_event_info is redirected to MyProc->wait_event_info. At
shutdown this is reversed. Because wait event reporting now does not need to
know about where the wait event is stored, it does not need to know about
PGPROC anymore.
The removal of the branch for checking pgstat_track_activities is simpler:
Don't check anymore. The cost due to the branch are often higher than the
store - and even if not, pgstat_track_activities is rarely disabled.
The main motivator to commit this work now is that removing the (indirect)
pgproc.h include from pgstat.h simplifies a patch to move statistics reporting
to shared memory (which still has a chance to get into 14).
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20210402194458.2vu324hkk2djq6ce@alap3.anarazel.de
2021-04-03 20:44:47 +02:00
|
|
|
*(volatile uint32 *) my_wait_event_info = wait_event_info;
|
2021-04-03 04:45:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* pgstat_report_wait_end() -
|
|
|
|
*
|
|
|
|
* Called to report end of a wait.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
pgstat_report_wait_end(void)
|
|
|
|
{
|
Improve efficiency of wait event reporting, remove proc.h dependency.
pgstat_report_wait_start() and pgstat_report_wait_end() required two
conditional branches so far. One to check if MyProc is NULL, the other to
check if pgstat_track_activities is set. As wait events are used around
comparatively lightweight operations, and are inlined (reducing branch
predictor effectiveness), that's not great.
The dependency on MyProc has a second disadvantage: Low-level subsystems, like
storage/file/fd.c, report wait events, but architecturally it is preferable
for them to not depend on inter-process subsystems like proc.h (defining
PGPROC). After this change including pgstat.h (nor obviously its
sub-components like backend_status.h, wait_event.h, ...) does not pull in IPC
related headers anymore.
These goals, efficiency and abstraction, are achieved by having
pgstat_report_wait_start/end() not interact with MyProc, but instead a new
my_wait_event_info variable. At backend startup it points to a local variable,
removing the need to check for MyProc being NULL. During process
initialization my_wait_event_info is redirected to MyProc->wait_event_info. At
shutdown this is reversed. Because wait event reporting now does not need to
know about where the wait event is stored, it does not need to know about
PGPROC anymore.
The removal of the branch for checking pgstat_track_activities is simpler:
Don't check anymore. The cost due to the branch are often higher than the
store - and even if not, pgstat_track_activities is rarely disabled.
The main motivator to commit this work now is that removing the (indirect)
pgproc.h include from pgstat.h simplifies a patch to move statistics reporting
to shared memory (which still has a chance to get into 14).
Author: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/20210402194458.2vu324hkk2djq6ce@alap3.anarazel.de
2021-04-03 20:44:47 +02:00
|
|
|
/* see pgstat_report_wait_start() */
|
|
|
|
*(volatile uint32 *) my_wait_event_info = 0;
|
2021-04-03 04:45:24 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* WAIT_EVENT_H */
|