Use condition variables to wait for checkpoints.
Previously we used a polling/sleeping loop to wait for checkpoints to begin and end, which leads to up to a couple hundred milliseconds of needless thumb-twiddling. Use condition variables instead. Author: Thomas Munro Reviewed-by: Andres Freund Discussion: https://postgr.es/m/CA%2BhUKGLY7sDe%2Bbg1K%3DbnEzOofGoo4bJHYh9%2BcDCXJepb6DQmLw%40mail.gmail.com
This commit is contained in:
parent
5655565c07
commit
c6c9474aaf
|
@ -1281,7 +1281,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
||||||
<entry>Waiting in an extension.</entry>
|
<entry>Waiting in an extension.</entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry morerows="34"><literal>IPC</literal></entry>
|
<entry morerows="36"><literal>IPC</literal></entry>
|
||||||
<entry><literal>BgWorkerShutdown</literal></entry>
|
<entry><literal>BgWorkerShutdown</literal></entry>
|
||||||
<entry>Waiting for background worker to shut down.</entry>
|
<entry>Waiting for background worker to shut down.</entry>
|
||||||
</row>
|
</row>
|
||||||
|
@ -1293,6 +1293,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
||||||
<entry><literal>BtreePage</literal></entry>
|
<entry><literal>BtreePage</literal></entry>
|
||||||
<entry>Waiting for the page number needed to continue a parallel B-tree scan to become available.</entry>
|
<entry>Waiting for the page number needed to continue a parallel B-tree scan to become available.</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><literal>CheckpointDone</literal></entry>
|
||||||
|
<entry>Waiting for a checkpoint to complete.</entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><literal>CheckpointStart</literal></entry>
|
||||||
|
<entry>Waiting for a checkpoint to start.</entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry><literal>ClogGroupUpdate</literal></entry>
|
<entry><literal>ClogGroupUpdate</literal></entry>
|
||||||
<entry>Waiting for group leader to update transaction status at transaction end.</entry>
|
<entry>Waiting for group leader to update transaction status at transaction end.</entry>
|
||||||
|
|
|
@ -126,6 +126,9 @@ typedef struct
|
||||||
|
|
||||||
int ckpt_flags; /* checkpoint flags, as defined in xlog.h */
|
int ckpt_flags; /* checkpoint flags, as defined in xlog.h */
|
||||||
|
|
||||||
|
ConditionVariable start_cv; /* signaled when ckpt_started advances */
|
||||||
|
ConditionVariable done_cv; /* signaled when ckpt_done advances */
|
||||||
|
|
||||||
uint32 num_backend_writes; /* counts user backend buffer writes */
|
uint32 num_backend_writes; /* counts user backend buffer writes */
|
||||||
uint32 num_backend_fsync; /* counts user backend fsync calls */
|
uint32 num_backend_fsync; /* counts user backend fsync calls */
|
||||||
|
|
||||||
|
@ -428,6 +431,8 @@ CheckpointerMain(void)
|
||||||
CheckpointerShmem->ckpt_started++;
|
CheckpointerShmem->ckpt_started++;
|
||||||
SpinLockRelease(&CheckpointerShmem->ckpt_lck);
|
SpinLockRelease(&CheckpointerShmem->ckpt_lck);
|
||||||
|
|
||||||
|
ConditionVariableBroadcast(&CheckpointerShmem->start_cv);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The end-of-recovery checkpoint is a real checkpoint that's
|
* The end-of-recovery checkpoint is a real checkpoint that's
|
||||||
* performed while we're still in recovery.
|
* performed while we're still in recovery.
|
||||||
|
@ -488,6 +493,8 @@ CheckpointerMain(void)
|
||||||
CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
|
CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
|
||||||
SpinLockRelease(&CheckpointerShmem->ckpt_lck);
|
SpinLockRelease(&CheckpointerShmem->ckpt_lck);
|
||||||
|
|
||||||
|
ConditionVariableBroadcast(&CheckpointerShmem->done_cv);
|
||||||
|
|
||||||
if (ckpt_performed)
|
if (ckpt_performed)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -915,6 +922,8 @@ CheckpointerShmemInit(void)
|
||||||
MemSet(CheckpointerShmem, 0, size);
|
MemSet(CheckpointerShmem, 0, size);
|
||||||
SpinLockInit(&CheckpointerShmem->ckpt_lck);
|
SpinLockInit(&CheckpointerShmem->ckpt_lck);
|
||||||
CheckpointerShmem->max_requests = NBuffers;
|
CheckpointerShmem->max_requests = NBuffers;
|
||||||
|
ConditionVariableInit(&CheckpointerShmem->start_cv);
|
||||||
|
ConditionVariableInit(&CheckpointerShmem->done_cv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1023,6 +1032,7 @@ RequestCheckpoint(int flags)
|
||||||
new_failed;
|
new_failed;
|
||||||
|
|
||||||
/* Wait for a new checkpoint to start. */
|
/* Wait for a new checkpoint to start. */
|
||||||
|
ConditionVariablePrepareToSleep(&CheckpointerShmem->start_cv);
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
|
SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
|
||||||
|
@ -1032,13 +1042,15 @@ RequestCheckpoint(int flags)
|
||||||
if (new_started != old_started)
|
if (new_started != old_started)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
ConditionVariableSleep(&CheckpointerShmem->start_cv,
|
||||||
pg_usleep(100000L);
|
WAIT_EVENT_CHECKPOINT_START);
|
||||||
}
|
}
|
||||||
|
ConditionVariableCancelSleep();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are waiting for ckpt_done >= new_started, in a modulo sense.
|
* We are waiting for ckpt_done >= new_started, in a modulo sense.
|
||||||
*/
|
*/
|
||||||
|
ConditionVariablePrepareToSleep(&CheckpointerShmem->done_cv);
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
int new_done;
|
int new_done;
|
||||||
|
@ -1051,9 +1063,10 @@ RequestCheckpoint(int flags)
|
||||||
if (new_done - new_started >= 0)
|
if (new_done - new_started >= 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
CHECK_FOR_INTERRUPTS();
|
ConditionVariableSleep(&CheckpointerShmem->done_cv,
|
||||||
pg_usleep(100000L);
|
WAIT_EVENT_CHECKPOINT_DONE);
|
||||||
}
|
}
|
||||||
|
ConditionVariableCancelSleep();
|
||||||
|
|
||||||
if (new_failed != old_failed)
|
if (new_failed != old_failed)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
|
|
|
@ -3623,6 +3623,12 @@ pgstat_get_wait_ipc(WaitEventIPC w)
|
||||||
case WAIT_EVENT_BTREE_PAGE:
|
case WAIT_EVENT_BTREE_PAGE:
|
||||||
event_name = "BtreePage";
|
event_name = "BtreePage";
|
||||||
break;
|
break;
|
||||||
|
case WAIT_EVENT_CHECKPOINT_DONE:
|
||||||
|
event_name = "CheckpointDone";
|
||||||
|
break;
|
||||||
|
case WAIT_EVENT_CHECKPOINT_START:
|
||||||
|
event_name = "CheckpointStart";
|
||||||
|
break;
|
||||||
case WAIT_EVENT_CLOG_GROUP_UPDATE:
|
case WAIT_EVENT_CLOG_GROUP_UPDATE:
|
||||||
event_name = "ClogGroupUpdate";
|
event_name = "ClogGroupUpdate";
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -817,6 +817,8 @@ typedef enum
|
||||||
WAIT_EVENT_BGWORKER_STARTUP,
|
WAIT_EVENT_BGWORKER_STARTUP,
|
||||||
WAIT_EVENT_BTREE_PAGE,
|
WAIT_EVENT_BTREE_PAGE,
|
||||||
WAIT_EVENT_CLOG_GROUP_UPDATE,
|
WAIT_EVENT_CLOG_GROUP_UPDATE,
|
||||||
|
WAIT_EVENT_CHECKPOINT_DONE,
|
||||||
|
WAIT_EVENT_CHECKPOINT_START,
|
||||||
WAIT_EVENT_EXECUTE_GATHER,
|
WAIT_EVENT_EXECUTE_GATHER,
|
||||||
WAIT_EVENT_HASH_BATCH_ALLOCATING,
|
WAIT_EVENT_HASH_BATCH_ALLOCATING,
|
||||||
WAIT_EVENT_HASH_BATCH_ELECTING,
|
WAIT_EVENT_HASH_BATCH_ELECTING,
|
||||||
|
|
Loading…
Reference in New Issue