diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b584cb0d0b..6d3a4cd3df 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -433,11 +433,6 @@ typedef struct XLogCtlData */ Latch recoveryWakeupLatch; - /* - * WALWriterLatch is used to wake up the WALWriter to write some WAL. - */ - Latch WALWriterLatch; - /* * During recovery, we keep a copy of the latest checkpoint record here. * Used by the background writer when it wants to create a restartpoint. @@ -1935,7 +1930,8 @@ XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN) /* * Nudge the WALWriter if we have a full page of WAL to write. */ - SetLatch(&XLogCtl->WALWriterLatch); + if (ProcGlobal->walwriterLatch) + SetLatch(ProcGlobal->walwriterLatch); } /* @@ -2167,22 +2163,25 @@ XLogFlush(XLogRecPtr record) * block, and flush through the latest one of those. Thus, if async commits * are not being used, we will flush complete blocks only. We can guarantee * that async commits reach disk after at most three cycles; normally only - * one or two. (We allow XLogWrite to write "flexibly", meaning it can stop - * at the end of the buffer ring; this makes a difference only with very high - * load or long wal_writer_delay, but imposes one extra cycle for the worst - * case for async commits.) + * one or two. (When flushing complete blocks, we allow XLogWrite to write + * "flexibly", meaning it can stop at the end of the buffer ring; this makes a + * difference only with very high load or long wal_writer_delay, but imposes + * one extra cycle for the worst case for async commits.) * * This routine is invoked periodically by the background walwriter process. + * + * Returns TRUE if we flushed anything. */ -void +bool XLogBackgroundFlush(void) { XLogRecPtr WriteRqstPtr; bool flexible = true; + bool wrote_something = false; /* XLOG doesn't need flushing during recovery */ if (RecoveryInProgress()) - return; + return false; /* read LogwrtResult and update local state */ { @@ -2224,7 +2223,7 @@ XLogBackgroundFlush(void) XLogFileClose(); } } - return; + return false; } #ifdef WAL_DEBUG @@ -2247,10 +2246,13 @@ XLogBackgroundFlush(void) WriteRqst.Write = WriteRqstPtr; WriteRqst.Flush = WriteRqstPtr; XLogWrite(WriteRqst, flexible, false); + wrote_something = true; } LWLockRelease(WALWriteLock); END_CRIT_SECTION(); + + return wrote_something; } /* @@ -5101,7 +5103,6 @@ XLOGShmemInit(void) XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages); SpinLockInit(&XLogCtl->info_lck); InitSharedLatch(&XLogCtl->recoveryWakeupLatch); - InitSharedLatch(&XLogCtl->WALWriterLatch); /* * If we are not in bootstrap mode, pg_control should already exist. Read @@ -10478,12 +10479,3 @@ WakeupRecovery(void) { SetLatch(&XLogCtl->recoveryWakeupLatch); } - -/* - * Manage the WALWriterLatch - */ -Latch * -WALWriterLatch(void) -{ - return &XLogCtl->WALWriterLatch; -} diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 2329b1a9a9..2731eb8f24 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -51,6 +51,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" +#include "storage/proc.h" #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" @@ -178,6 +179,7 @@ static void UpdateSharedMemoryConfig(void); static void chkpt_quickdie(SIGNAL_ARGS); static void ChkptSigHupHandler(SIGNAL_ARGS); static void ReqCheckpointHandler(SIGNAL_ARGS); +static void chkpt_sigusr1_handler(SIGNAL_ARGS); static void ReqShutdownHandler(SIGNAL_ARGS); @@ -224,7 +226,7 @@ CheckpointerMain(void) pqsignal(SIGQUIT, chkpt_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ + pqsignal(SIGUSR1, chkpt_sigusr1_handler); pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */ /* @@ -359,6 +361,12 @@ CheckpointerMain(void) */ UpdateSharedMemoryConfig(); + /* + * Advertise our latch that backends can use to wake us up while we're + * sleeping. + */ + ProcGlobal->checkpointerLatch = &MyProc->procLatch; + /* * Loop forever */ @@ -368,6 +376,10 @@ CheckpointerMain(void) int flags = 0; pg_time_t now; int elapsed_secs; + int cur_timeout; + + /* Clear any already-pending wakeups */ + ResetLatch(&MyProc->procLatch); /* * Emergency bailout if postmaster has died. This is to avoid the @@ -387,15 +399,15 @@ CheckpointerMain(void) ProcessConfigFile(PGC_SIGHUP); /* - * Checkpointer is the last process to shutdown, so we ask + * Checkpointer is the last process to shut down, so we ask * it to hold the keys for a range of other tasks required * most of which have nothing to do with checkpointing at all. * - * For various reasons, some config values can change - * dynamically so are the primary copy of them is held in - * shared memory to make sure all backends see the same value. - * We make Checkpointer responsible for updating the shared - * memory copy if the parameter setting changes because of SIGHUP. + * For various reasons, some config values can change dynamically + * so the primary copy of them is held in shared memory to make + * sure all backends see the same value. We make Checkpointer + * responsible for updating the shared memory copy if the + * parameter setting changes because of SIGHUP. */ UpdateSharedMemoryConfig(); } @@ -488,7 +500,7 @@ CheckpointerMain(void) errhint("Consider increasing the configuration parameter \"checkpoint_segments\"."))); /* - * Initialize checkpointer-private variables used during checkpoint. + * Initialize checkpointer-private variables used during checkpoint */ ckpt_active = true; if (!do_restartpoint) @@ -543,20 +555,34 @@ CheckpointerMain(void) ckpt_active = false; } + /* Check for archive_timeout and switch xlog files if necessary. */ + CheckArchiveTimeout(); + /* * Send off activity statistics to the stats collector */ pgstat_send_bgwriter(); /* - * Nap for a while and then loop again. Later patches will replace - * this with a latch loop. Keep it simple now for clarity. - * Relatively long sleep because the bgwriter does cleanup now. + * Sleep until we are signaled or it's time for another checkpoint + * or xlog file switch. */ - pg_usleep(500000L); + now = (pg_time_t) time(NULL); + elapsed_secs = now - last_checkpoint_time; + if (elapsed_secs >= CheckPointTimeout) + continue; /* no sleep for us ... */ + cur_timeout = CheckPointTimeout - elapsed_secs; + if (XLogArchiveTimeout > 0 && !RecoveryInProgress()) + { + elapsed_secs = now - last_xlog_switch_time; + if (elapsed_secs >= XLogArchiveTimeout) + continue; /* no sleep for us ... */ + cur_timeout = Min(cur_timeout, XLogArchiveTimeout - elapsed_secs); + } - /* Check for archive_timeout and switch xlog files if necessary. */ - CheckArchiveTimeout(); + (void) WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + cur_timeout * 1000L /* convert to ms */); } } @@ -814,21 +840,50 @@ chkpt_quickdie(SIGNAL_ARGS) static void ChkptSigHupHandler(SIGNAL_ARGS) { + int save_errno = errno; + got_SIGHUP = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; } /* SIGINT: set flag to run a normal checkpoint right away */ static void ReqCheckpointHandler(SIGNAL_ARGS) { + int save_errno = errno; + checkpoint_requested = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* SIGUSR1: used for latch wakeups */ +static void +chkpt_sigusr1_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + + latch_sigusr1_handler(); + + errno = save_errno; } /* SIGUSR2: set flag to run a shutdown checkpoint and exit */ static void ReqShutdownHandler(SIGNAL_ARGS) { + int save_errno = errno; + shutdown_requested = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; } @@ -1055,6 +1110,7 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno) { BgWriterRequest *request; + bool too_full; if (!IsUnderPostmaster) return false; /* probably shouldn't even get here */ @@ -1068,14 +1124,13 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BgWriterShmem->num_backend_writes++; /* - * If the background writer isn't running or the request queue is full, + * If the checkpointer isn't running or the request queue is full, * the backend will have to perform its own fsync request. But before - * forcing that to happen, we can try to compact the background writer - * request queue. + * forcing that to happen, we can try to compact the request queue. */ if (BgWriterShmem->checkpointer_pid == 0 || - (BgWriterShmem->num_requests >= BgWriterShmem->max_requests - && !CompactCheckpointerRequestQueue())) + (BgWriterShmem->num_requests >= BgWriterShmem->max_requests && + !CompactCheckpointerRequestQueue())) { /* * Count the subset of writes where backends have to do their own @@ -1085,11 +1140,23 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, LWLockRelease(BgWriterCommLock); return false; } + + /* OK, insert request */ request = &BgWriterShmem->requests[BgWriterShmem->num_requests++]; request->rnode = rnode; request->forknum = forknum; request->segno = segno; + + /* If queue is more than half full, nudge the checkpointer to empty it */ + too_full = (BgWriterShmem->num_requests >= + BgWriterShmem->max_requests / 2); + LWLockRelease(BgWriterCommLock); + + /* ... but not till after we release the lock */ + if (too_full && ProcGlobal->checkpointerLatch) + SetLatch(ProcGlobal->checkpointerLatch); + return true; } @@ -1109,7 +1176,7 @@ ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, * practice: there's one queue entry per shared buffer. */ static bool -CompactCheckpointerRequestQueue() +CompactCheckpointerRequestQueue(void) { struct BgWriterSlotMapping { @@ -1230,7 +1297,7 @@ AbsorbFsyncRequests(void) */ LWLockAcquire(BgWriterCommLock, LW_EXCLUSIVE); - /* Transfer write count into pending pgstats message */ + /* Transfer stats counts into pending pgstats message */ BgWriterStats.m_buf_written_backend += BgWriterShmem->num_backend_writes; BgWriterStats.m_buf_fsync_backend += BgWriterShmem->num_backend_fsync; diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 08ef946ee6..cd41dbbc8c 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -54,6 +54,7 @@ #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" +#include "storage/proc.h" #include "storage/smgr.h" #include "utils/guc.h" #include "utils/hsearch.h" @@ -66,6 +67,14 @@ */ int WalWriterDelay = 200; +/* + * Number of do-nothing loops before lengthening the delay time, and the + * multiplier to apply to WalWriterDelay when we do decide to hibernate. + * (Perhaps these need to be configurable?) + */ +#define LOOPS_UNTIL_HIBERNATE 50 +#define HIBERNATE_FACTOR 25 + /* * Flags set by interrupt handlers for later service in the main loop. */ @@ -76,6 +85,7 @@ static volatile sig_atomic_t shutdown_requested = false; static void wal_quickdie(SIGNAL_ARGS); static void WalSigHupHandler(SIGNAL_ARGS); static void WalShutdownHandler(SIGNAL_ARGS); +static void walwriter_sigusr1_handler(SIGNAL_ARGS); /* * Main entry point for walwriter process @@ -88,8 +98,7 @@ WalWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext walwriter_context; - - InitLatch(WALWriterLatch()); /* initialize latch used in main loop */ + int left_till_hibernate; /* * If possible, make this process a group leader, so that the postmaster @@ -114,7 +123,7 @@ WalWriterMain(void) pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); pqsignal(SIGPIPE, SIG_IGN); - pqsignal(SIGUSR1, SIG_IGN); /* reserve for ProcSignal */ + pqsignal(SIGUSR1, walwriter_sigusr1_handler); pqsignal(SIGUSR2, SIG_IGN); /* not used */ /* @@ -217,12 +226,26 @@ WalWriterMain(void) */ PG_SETMASK(&UnBlockSig); + /* + * Reset hibernation state after any error. + */ + left_till_hibernate = LOOPS_UNTIL_HIBERNATE; + + /* + * Advertise our latch that backends can use to wake us up while we're + * sleeping. + */ + ProcGlobal->walwriterLatch = &MyProc->procLatch; + /* * Loop forever */ for (;;) { - ResetLatch(WALWriterLatch()); + long cur_timeout; + + /* Clear any already-pending wakeups */ + ResetLatch(&MyProc->procLatch); /* * Emergency bailout if postmaster has died. This is to avoid the @@ -246,13 +269,27 @@ WalWriterMain(void) } /* - * Do what we're here for... + * Do what we're here for; then, if XLogBackgroundFlush() found useful + * work to do, reset hibernation counter. */ - XLogBackgroundFlush(); + if (XLogBackgroundFlush()) + left_till_hibernate = LOOPS_UNTIL_HIBERNATE; + else if (left_till_hibernate > 0) + left_till_hibernate--; - (void) WaitLatch(WALWriterLatch(), - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - WalWriterDelay /* ms */); + /* + * Sleep until we are signaled or WalWriterDelay has elapsed. If we + * haven't done anything useful for quite some time, lengthen the + * sleep time so as to reduce the server's idle power consumption. + */ + if (left_till_hibernate > 0) + cur_timeout = WalWriterDelay; /* in ms */ + else + cur_timeout = WalWriterDelay * HIBERNATE_FACTOR; + + (void) WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + cur_timeout); } } @@ -298,14 +335,35 @@ wal_quickdie(SIGNAL_ARGS) static void WalSigHupHandler(SIGNAL_ARGS) { + int save_errno = errno; + got_SIGHUP = true; - SetLatch(WALWriterLatch()); + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; } /* SIGTERM: set flag to exit normally */ static void WalShutdownHandler(SIGNAL_ARGS) { + int save_errno = errno; + shutdown_requested = true; - SetLatch(WALWriterLatch()); + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* SIGUSR1: used for latch wakeups */ +static void +walwriter_sigusr1_handler(SIGNAL_ARGS) +{ + int save_errno = errno; + + latch_sigusr1_handler(); + + errno = save_errno; } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 20ed5de75e..8e309f8a0b 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -187,6 +187,8 @@ InitProcGlobal(void) ProcGlobal->startupProcPid = 0; ProcGlobal->startupBufferPinWaitBufId = -1; ProcGlobal->bgwriterLatch = NULL; + ProcGlobal->walwriterLatch = NULL; + ProcGlobal->checkpointerLatch = NULL; /* * Create and initialize all the PGPROC structures we'll need (except for diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f8aecef665..129712e7b9 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -16,7 +16,6 @@ #include "datatype/timestamp.h" #include "lib/stringinfo.h" #include "storage/buf.h" -#include "storage/latch.h" #include "utils/pg_crc.h" /* @@ -266,7 +265,7 @@ extern CheckpointStatsData CheckpointStats; extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata); extern void XLogFlush(XLogRecPtr RecPtr); -extern void XLogBackgroundFlush(void); +extern bool XLogBackgroundFlush(void); extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(uint32 log, uint32 seg, bool *use_existent, bool use_lock); @@ -317,7 +316,6 @@ extern TimeLineID GetRecoveryTargetTLI(void); extern bool CheckPromoteSignal(void); extern void WakeupRecovery(void); -extern Latch *WALWriterLatch(void); /* * Starting/stopping a base backup diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h index f97fedfdf6..6a7df38d1a 100644 --- a/src/include/storage/latch.h +++ b/src/include/storage/latch.h @@ -64,6 +64,15 @@ * will be lifted in future by inserting suitable memory barriers into * SetLatch and ResetLatch. * + * Note that use of the process latch (PGPROC.procLatch) is generally better + * than an ad-hoc shared latch for signaling auxiliary processes. This is + * because generic signal handlers will call SetLatch on the process latch + * only, so using any latch other than the process latch effectively precludes + * ever registering a generic handler. Since signals have the potential to + * invalidate the latch timeout on some platforms, resulting in a + * denial-of-service, it is important to verify that all signal handlers + * within all WaitLatch-calling processes call SetLatch. + * * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 987bc08260..c4808f44a0 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -188,8 +188,12 @@ typedef struct PROC_HDR PGPROC *freeProcs; /* Head of list of autovacuum's free PGPROC structures */ PGPROC *autovacFreeProcs; - /* BGWriter process latch */ + /* BGWriter process's latch */ Latch *bgwriterLatch; + /* WALWriter process's latch */ + Latch *walwriterLatch; + /* Checkpointer process's latch */ + Latch *checkpointerLatch; /* Current shared estimate of appropriate spins_per_delay value */ int spins_per_delay; /* The proc of the Startup process, since not in ProcArray */