Reduce idle power consumption of walwriter and checkpointer processes.

This patch modifies the walwriter process so that, when it has not found
anything useful to do for many consecutive wakeup cycles, it extends its
sleep time to reduce the server's idle power consumption.  It reverts to
normal as soon as it's done any successful flushes.  It's still true that
during any async commit, backends check for completed, unflushed pages of
WAL and signal the walwriter if there are any; so that in practice the
walwriter can get awakened and returned to normal operation sooner than the
sleep time might suggest.

Also, improve the checkpointer so that it uses a latch and a computed delay
time to not wake up at all except when it has something to do, replacing a
previous hardcoded 0.5 sec wakeup cycle.  This also is primarily useful for
reducing the server's power consumption when idle.

In passing, get rid of the dedicated latch for signaling the walwriter in
favor of using its procLatch, since that comports better with possible
generic signal handlers using that latch.  Also, fix a pre-existing bug
with failure to save/restore errno in walwriter's signal handlers.

Peter Geoghegan, somewhat simplified by Tom
This commit is contained in:
Tom Lane 2012-05-08 20:03:26 -04:00
parent 081ca7a0d1
commit 5461564a9d
7 changed files with 189 additions and 59 deletions

View File

@ -433,11 +433,6 @@ typedef struct XLogCtlData
*/
Latch recoveryWakeupLatch;
/*
* WALWriterLatch is used to wake up the WALWriter to write some WAL.
*/
Latch WALWriterLatch;
/*
* During recovery, we keep a copy of the latest checkpoint record here.
* Used by the background writer when it wants to create a restartpoint.
@ -1935,7 +1930,8 @@ XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
/*
* Nudge the WALWriter if we have a full page of WAL to write.
*/
SetLatch(&XLogCtl->WALWriterLatch);
if (ProcGlobal->walwriterLatch)
SetLatch(ProcGlobal->walwriterLatch);
}
/*
@ -2167,22 +2163,25 @@ XLogFlush(XLogRecPtr record)
* block, and flush through the latest one of those. Thus, if async commits
* are not being used, we will flush complete blocks only. We can guarantee
* that async commits reach disk after at most three cycles; normally only
* one or two. (We allow XLogWrite to write "flexibly", meaning it can stop
* at the end of the buffer ring; this makes a difference only with very high
* load or long wal_writer_delay, but imposes one extra cycle for the worst
* case for async commits.)
* one or two. (When flushing complete blocks, we allow XLogWrite to write
* "flexibly", meaning it can stop at the end of the buffer ring; this makes a
* difference only with very high load or long wal_writer_delay, but imposes
* one extra cycle for the worst case for async commits.)
*
* This routine is invoked periodically by the background walwriter process.
*
* Returns TRUE if we flushed anything.
*/
void
bool
XLogBackgroundFlush(void)
{
XLogRecPtr WriteRqstPtr;
bool flexible = true;
bool wrote_something = false;
/* XLOG doesn't need flushing during recovery */
if (RecoveryInProgress())
return;
return false;
/* read LogwrtResult and update local state */
{
@ -2224,7 +2223,7 @@ XLogBackgroundFlush(void)
XLogFileClose();
}
}
return;
return false;
}
#ifdef WAL_DEBUG
@ -2247,10 +2246,13 @@ XLogBackgroundFlush(void)
WriteRqst.Write = WriteRqstPtr;
WriteRqst.Flush = WriteRqstPtr;
XLogWrite(WriteRqst, flexible, false);
wrote_something = true;
}
LWLockRelease(WALWriteLock);
END_CRIT_SECTION();
return wrote_something;
}
/*
@ -5101,7 +5103,6 @@ XLOGShmemInit(void)
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
SpinLockInit(&XLogCtl->info_lck);
InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
InitSharedLatch(&XLogCtl->WALWriterLatch);
/*
* If we are not in bootstrap mode, pg_control should already exist. Read
@ -10478,12 +10479,3 @@ WakeupRecovery(void)
{
SetLatch(&XLogCtl->recoveryWakeupLatch);
}
/*
* Manage the WALWriterLatch
*/
Latch *
WALWriterLatch(void)
{
return &XLogCtl->WALWriterLatch;
}

View File

@ -51,6 +51,7 @@
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
#include "storage/spin.h"
@ -178,6 +179,7 @@ static void UpdateSharedMemoryConfig(void);
static void chkpt_quickdie(SIGNAL_ARGS);
static void ChkptSigHupHandler(SIGNAL_ARGS);
static void ReqCheckpointHandler(SIGNAL_ARGS);
static void chkpt_sigusr1_handler(SIGNAL_ARGS);
static void ReqShutdownHandler(SIGNAL_ARGS);
@ -224,7 +226,7 @@ CheckpointerMain(void)
pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
pqsignal(SIGUSR1, chkpt_sigusr1_handler);
pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */
/*
@ -359,6 +361,12 @@ CheckpointerMain(void)
*/
UpdateSharedMemoryConfig();
/*
* Advertise our latch that backends can use to wake us up while we're
* sleeping.
*/
ProcGlobal->checkpointerLatch = &MyProc->procLatch;
/*
* Loop forever
*/
@ -368,6 +376,10 @@ CheckpointerMain(void)
int flags = 0;
pg_time_t now;
int elapsed_secs;
int cur_timeout;
/* Clear any already-pending wakeups */
ResetLatch(&MyProc->procLatch);
/*
* Emergency bailout if postmaster has died. This is to avoid the
@ -387,15 +399,15 @@ CheckpointerMain(void)
ProcessConfigFile(PGC_SIGHUP);
/*
* Checkpointer is the last process to shutdown, so we ask
* Checkpointer is the last process to shut down, so we ask
* it to hold the keys for a range of other tasks required
* most of which have nothing to do with checkpointing at all.
*
* For various reasons, some config values can change
* dynamically so are the primary copy of them is held in
* shared memory to make sure all backends see the same value.
* We make Checkpointer responsible for updating the shared
* memory copy if the parameter setting changes because of SIGHUP.
* For various reasons, some config values can change dynamically
* so the primary copy of them is held in shared memory to make
* sure all backends see the same value. We make Checkpointer
* responsible for updating the shared memory copy if the
* parameter setting changes because of SIGHUP.
*/
UpdateSharedMemoryConfig();
}
@ -488,7 +500,7 @@ CheckpointerMain(void)
errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
/*
* Initialize checkpointer-private variables used during checkpoint.
* Initialize checkpointer-private variables used during checkpoint
*/
ckpt_active = true;
if (!do_restartpoint)
@ -543,20 +555,34 @@ CheckpointerMain(void)
ckpt_active = false;
}
/* Check for archive_timeout and switch xlog files if necessary. */
CheckArchiveTimeout();
/*
* Send off activity statistics to the stats collector
*/
pgstat_send_bgwriter();
/*
* Nap for a while and then loop again. Later patches will replace
* this with a latch loop. Keep it simple now for clarity.
* Relatively long sleep because the bgwriter does cleanup now.
* Sleep until we are signaled or it's time for another checkpoint
* or xlog file switch.
*/
pg_usleep(500000L);
now = (pg_time_t) time(NULL);
elapsed_secs = now - last_checkpoint_time;
if (elapsed_secs >= CheckPointTimeout)
continue; /* no sleep for us ... */
cur_timeout = CheckPointTimeout - elapsed_secs;
if (XLogArchiveTimeout > 0 && !RecoveryInProgress())
{
elapsed_secs = now - last_xlog_switch_time;
if (elapsed_secs >= XLogArchiveTimeout)
continue; /* no sleep for us ... */
cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs);
}
/* Check for archive_timeout and switch xlog files if necessary. */
CheckArchiveTimeout();
(void) WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
cur_timeout * 1000L /* convert to ms */);
}
}
@ -814,21 +840,50 @@ chkpt_quickdie(SIGNAL_ARGS)
static void
ChkptSigHupHandler(SIGNAL_ARGS)
{
int save_errno = errno;
got_SIGHUP = true;
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
/* SIGINT: set flag to run a normal checkpoint right away */
static void
ReqCheckpointHandler(SIGNAL_ARGS)
{
int save_errno = errno;
checkpoint_requested = true;
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
/* SIGUSR1: used for latch wakeups */
static void
chkpt_sigusr1_handler(SIGNAL_ARGS)
{
int save_errno = errno;
latch_sigusr1_handler();
errno = save_errno;
}
/* SIGUSR2: set flag to run a shutdown checkpoint and exit */
static void
ReqShutdownHandler(SIGNAL_ARGS)
{
int save_errno = errno;
shutdown_requested = true;
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
@ -1055,6 +1110,7 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
BlockNumber segno)
{
BgWriterRequest *request;
bool too_full;
if (!IsUnderPostmaster)
return false; /* probably shouldn't even get here */
@ -1068,14 +1124,13 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
BgWriterShmem->num_backend_writes++;
/*
* If the background writer isn't running or the request queue is full,
* If the checkpointer isn't running or the request queue is full,
* the backend will have to perform its own fsync request. But before
* forcing that to happen, we can try to compact the background writer
* request queue.
* forcing that to happen, we can try to compact the request queue.
*/
if (BgWriterShmem->checkpointer_pid == 0 ||
(BgWriterShmem->num_requests >= BgWriterShmem->max_requests
&& !CompactCheckpointerRequestQueue()))
(BgWriterShmem->num_requests >= BgWriterShmem->max_requests &&
!CompactCheckpointerRequestQueue()))
{
/*
* Count the subset of writes where backends have to do their own
@ -1085,11 +1140,23 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
LWLockRelease(BgWriterCommLock);
return false;
}
/* OK, insert request */
request = &BgWriterShmem->requests[BgWriterShmem->num_requests++];
request->rnode = rnode;
request->forknum = forknum;
request->segno = segno;
/* If queue is more than half full, nudge the checkpointer to empty it */
too_full = (BgWriterShmem->num_requests >=
BgWriterShmem->max_requests / 2);
LWLockRelease(BgWriterCommLock);
/* ... but not till after we release the lock */
if (too_full && ProcGlobal->checkpointerLatch)
SetLatch(ProcGlobal->checkpointerLatch);
return true;
}
@ -1109,7 +1176,7 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum,
* practice: there's one queue entry per shared buffer.
*/
static bool
CompactCheckpointerRequestQueue()
CompactCheckpointerRequestQueue(void)
{
struct BgWriterSlotMapping
{
@ -1230,7 +1297,7 @@ AbsorbFsyncRequests(void)
*/
LWLockAcquire(BgWriterCommLock, LW_EXCLUSIVE);
/* Transfer write count into pending pgstats message */
/* Transfer stats counts into pending pgstats message */
BgWriterStats.m_buf_written_backend += BgWriterShmem->num_backend_writes;
BgWriterStats.m_buf_fsync_backend += BgWriterShmem->num_backend_fsync;

View File

@ -54,6 +54,7 @@
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/smgr.h"
#include "utils/guc.h"
#include "utils/hsearch.h"
@ -66,6 +67,14 @@
*/
int WalWriterDelay = 200;
/*
* Number of do-nothing loops before lengthening the delay time, and the
* multiplier to apply to WalWriterDelay when we do decide to hibernate.
* (Perhaps these need to be configurable?)
*/
#define LOOPS_UNTIL_HIBERNATE 50
#define HIBERNATE_FACTOR 25
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
@ -76,6 +85,7 @@ static volatile sig_atomic_t shutdown_requested = false;
static void wal_quickdie(SIGNAL_ARGS);
static void WalSigHupHandler(SIGNAL_ARGS);
static void WalShutdownHandler(SIGNAL_ARGS);
static void walwriter_sigusr1_handler(SIGNAL_ARGS);
/*
* Main entry point for walwriter process
@ -88,8 +98,7 @@ WalWriterMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext walwriter_context;
InitLatch(WALWriterLatch()); /* initialize latch used in main loop */
int left_till_hibernate;
/*
* If possible, make this process a group leader, so that the postmaster
@ -114,7 +123,7 @@ WalWriterMain(void)
pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */
pqsignal(SIGUSR1, walwriter_sigusr1_handler);
pqsignal(SIGUSR2, SIG_IGN); /* not used */
/*
@ -217,12 +226,26 @@ WalWriterMain(void)
*/
PG_SETMASK(&UnBlockSig);
/*
* Reset hibernation state after any error.
*/
left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
/*
* Advertise our latch that backends can use to wake us up while we're
* sleeping.
*/
ProcGlobal->walwriterLatch = &MyProc->procLatch;
/*
* Loop forever
*/
for (;;)
{
ResetLatch(WALWriterLatch());
long cur_timeout;
/* Clear any already-pending wakeups */
ResetLatch(&MyProc->procLatch);
/*
* Emergency bailout if postmaster has died. This is to avoid the
@ -246,13 +269,27 @@ WalWriterMain(void)
}
/*
* Do what we're here for...
* Do what we're here for; then, if XLogBackgroundFlush() found useful
* work to do, reset hibernation counter.
*/
XLogBackgroundFlush();
if (XLogBackgroundFlush())
left_till_hibernate = LOOPS_UNTIL_HIBERNATE;
else if (left_till_hibernate > 0)
left_till_hibernate--;
(void) WaitLatch(WALWriterLatch(),
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
WalWriterDelay /* ms */);
/*
* Sleep until we are signaled or WalWriterDelay has elapsed. If we
* haven't done anything useful for quite some time, lengthen the
* sleep time so as to reduce the server's idle power consumption.
*/
if (left_till_hibernate > 0)
cur_timeout = WalWriterDelay; /* in ms */
else
cur_timeout = WalWriterDelay * HIBERNATE_FACTOR;
(void) WaitLatch(&MyProc->procLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
cur_timeout);
}
}
@ -298,14 +335,35 @@ wal_quickdie(SIGNAL_ARGS)
static void
WalSigHupHandler(SIGNAL_ARGS)
{
int save_errno = errno;
got_SIGHUP = true;
SetLatch(WALWriterLatch());
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
/* SIGTERM: set flag to exit normally */
static void
WalShutdownHandler(SIGNAL_ARGS)
{
int save_errno = errno;
shutdown_requested = true;
SetLatch(WALWriterLatch());
if (MyProc)
SetLatch(&MyProc->procLatch);
errno = save_errno;
}
/* SIGUSR1: used for latch wakeups */
static void
walwriter_sigusr1_handler(SIGNAL_ARGS)
{
int save_errno = errno;
latch_sigusr1_handler();
errno = save_errno;
}

View File

@ -187,6 +187,8 @@ InitProcGlobal(void)
ProcGlobal->startupProcPid = 0;
ProcGlobal->startupBufferPinWaitBufId = -1;
ProcGlobal->bgwriterLatch = NULL;
ProcGlobal->walwriterLatch = NULL;
ProcGlobal->checkpointerLatch = NULL;
/*
* Create and initialize all the PGPROC structures we'll need (except for

View File

@ -16,7 +16,6 @@
#include "datatype/timestamp.h"
#include "lib/stringinfo.h"
#include "storage/buf.h"
#include "storage/latch.h"
#include "utils/pg_crc.h"
/*
@ -266,7 +265,7 @@ extern CheckpointStatsData CheckpointStats;
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr);
extern void XLogBackgroundFlush(void);
extern bool XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern int XLogFileInit(uint32 log, uint32 seg,
bool *use_existent, bool use_lock);
@ -317,7 +316,6 @@ extern TimeLineID GetRecoveryTargetTLI(void);
extern bool CheckPromoteSignal(void);
extern void WakeupRecovery(void);
extern Latch *WALWriterLatch(void);
/*
* Starting/stopping a base backup

View File

@ -64,6 +64,15 @@
* will be lifted in future by inserting suitable memory barriers into
* SetLatch and ResetLatch.
*
* Note that use of the process latch (PGPROC.procLatch) is generally better
* than an ad-hoc shared latch for signaling auxiliary processes. This is
* because generic signal handlers will call SetLatch on the process latch
* only, so using any latch other than the process latch effectively precludes
* ever registering a generic handler. Since signals have the potential to
* invalidate the latch timeout on some platforms, resulting in a
* denial-of-service, it is important to verify that all signal handlers
* within all WaitLatch-calling processes call SetLatch.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California

View File

@ -188,8 +188,12 @@ typedef struct PROC_HDR
PGPROC *freeProcs;
/* Head of list of autovacuum's free PGPROC structures */
PGPROC *autovacFreeProcs;
/* BGWriter process latch */
/* BGWriter process's latch */
Latch *bgwriterLatch;
/* WALWriter process's latch */
Latch *walwriterLatch;
/* Checkpointer process's latch */
Latch *checkpointerLatch;
/* Current shared estimate of appropriate spins_per_delay value */
int spins_per_delay;
/* The proc of the Startup process, since not in ProcArray */