Create a new dedicated Postgres process, "wal writer", which exists to write

and fsync WAL at convenient intervals.  For the moment it just tries to
offload this work from backends, but soon it will be responsible for
guaranteeing a maximum delay before asynchronously-committed transactions
will be flushed to disk.

This is a portion of Simon Riggs' async-commit patch, committed to CVS
separately because a background WAL writer seems like it might be a good idea
independently of the async-commit feature.  I rebased walwriter.c on
bgwriter.c because it seemed like a more appropriate way of handling signals;
while the startup/shutdown logic in postmaster.c is more like autovac because
we want walwriter to quit before we start the shutdown checkpoint.
This commit is contained in:
Tom Lane 2007-07-24 04:54:09 +00:00
parent 53d2951be7
commit ad4295728e
11 changed files with 547 additions and 70 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.132 2007/07/24 01:53:55 alvherre Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/config.sgml,v 1.133 2007/07/24 04:54:08 tgl Exp $ -->
<chapter Id="runtime-config">
<title>Server Configuration</title>
@ -1413,7 +1413,7 @@ SET ENABLE_SEQSCAN TO OFF;
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-buffers" xreflabel="wal_buffers">
<term><varname>wal_buffers</varname> (<type>integer</type>)</term>
<indexterm>
@ -1438,7 +1438,27 @@ SET ENABLE_SEQSCAN TO OFF;
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-wal-writer-delay" xreflabel="wal_writer_delay">
<term><varname>wal_writer_delay</varname> (<type>integer</type>)</term>
<indexterm>
<primary><varname>wal_writer_delay</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Specifies the delay between activity rounds for the WAL writer.
In each round the writer will flush WAL to disk. It then sleeps for
<varname>wal_writer_delay</> milliseconds, and repeats. The default
value is 200 milliseconds (<literal>200ms</>). Note that on many
systems, the effective resolution of sleep delays is 10 milliseconds;
setting <varname>wal_writer_delay</> to a value that is not a multiple
of 10 might have the same results as setting it to the next higher
multiple of 10. This parameter can only be set in the
<filename>postgresql.conf</> file or on the server command line.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-commit-delay" xreflabel="commit_delay">
<term><varname>commit_delay</varname> (<type>integer</type>)</term>
<indexterm>
@ -1521,7 +1541,7 @@ SET ENABLE_SEQSCAN TO OFF;
</indexterm>
<listitem>
<para>
Specifies the target length of checkpoints, as a fraction of
Specifies the target length of checkpoints, as a fraction of
the checkpoint interval. The default is 0.5.
This parameter can only be set in the <filename>postgresql.conf</>

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.274 2007/06/30 19:12:01 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.275 2007/07/24 04:54:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -484,7 +484,6 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
uint32 len,
write_len;
unsigned i;
XLogwrtRqst LogwrtRqst;
bool updrqst;
bool doPageWrites;
bool isLogSwitch = (rmid == RM_XLOG_ID && info == XLOG_SWITCH);
@ -643,43 +642,6 @@ begin:;
START_CRIT_SECTION();
/* update LogwrtResult before doing cache fill check */
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
LogwrtRqst = xlogctl->LogwrtRqst;
LogwrtResult = xlogctl->LogwrtResult;
SpinLockRelease(&xlogctl->info_lck);
}
/*
* If cache is half filled then try to acquire write lock and do
* XLogWrite. Ignore any fractional blocks in performing this check.
*/
LogwrtRqst.Write.xrecoff -= LogwrtRqst.Write.xrecoff % XLOG_BLCKSZ;
if (LogwrtRqst.Write.xlogid != LogwrtResult.Write.xlogid ||
(LogwrtRqst.Write.xrecoff >= LogwrtResult.Write.xrecoff +
XLogCtl->XLogCacheByte / 2))
{
if (LWLockConditionalAcquire(WALWriteLock, LW_EXCLUSIVE))
{
/*
* Since the amount of data we write here is completely optional
* anyway, tell XLogWrite it can be "flexible" and stop at a
* convenient boundary. This allows writes triggered by this
* mechanism to synchronize with the cache boundaries, so that in
* a long transaction we'll basically dump alternating halves of
* the buffer array.
*/
LogwrtResult = XLogCtl->Write.LogwrtResult;
if (XLByteLT(LogwrtResult.Write, LogwrtRqst.Write))
XLogWrite(LogwrtRqst, true, false);
LWLockRelease(WALWriteLock);
}
}
/* Now wait to get insert lock */
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
@ -1800,6 +1762,85 @@ XLogFlush(XLogRecPtr record)
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
}
/*
* Flush xlog, but without specifying exactly where to flush to.
*
* We normally flush only completed blocks; but if there is nothing to do on
* that basis, we check for unflushed async commits in the current incomplete
* block, and flush through the latest one of those. Thus, if async commits
* are not being used, we will flush complete blocks only. We can guarantee
* that async commits reach disk after at most three cycles; normally only
* one or two. (We allow XLogWrite to write "flexibly", meaning it can stop
* at the end of the buffer ring; this makes a difference only with very high
* load or long wal_writer_delay, but imposes one extra cycle for the worst
* case for async commits.)
*
* This routine is invoked periodically by the background walwriter process.
*/
void
XLogBackgroundFlush(void)
{
XLogRecPtr WriteRqstPtr;
bool flexible = true;
/* read LogwrtResult and update local state */
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
LogwrtResult = xlogctl->LogwrtResult;
WriteRqstPtr = xlogctl->LogwrtRqst.Write;
SpinLockRelease(&xlogctl->info_lck);
}
/* back off to last completed page boundary */
WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
#ifdef NOT_YET /* async commit patch is still to come */
/* if we have already flushed that far, consider async commit records */
if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->async_commit_lck);
WriteRqstPtr = xlogctl->asyncCommitLSN;
SpinLockRelease(&xlogctl->async_commit_lck);
flexible = false; /* ensure it all gets written */
}
#endif
/* Done if already known flushed */
if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
return;
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
elog(LOG, "xlog bg flush request %X/%X; write %X/%X; flush %X/%X",
WriteRqstPtr.xlogid, WriteRqstPtr.xrecoff,
LogwrtResult.Write.xlogid, LogwrtResult.Write.xrecoff,
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
#endif
START_CRIT_SECTION();
/* now wait for the write lock */
LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
LogwrtResult = XLogCtl->Write.LogwrtResult;
if (!XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
{
XLogwrtRqst WriteRqst;
WriteRqst.Write = WriteRqstPtr;
WriteRqst.Flush = WriteRqstPtr;
XLogWrite(WriteRqst, flexible, false);
}
LWLockRelease(WALWriteLock);
END_CRIT_SECTION();
}
/*
* Test whether XLOG data has been flushed up to (at least) the given position.
*

View File

@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.234 2007/06/28 00:02:37 tgl Exp $
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.235 2007/07/24 04:54:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -30,6 +30,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "postmaster/bgwriter.h"
#include "postmaster/walwriter.h"
#include "storage/freespace.h"
#include "storage/ipc.h"
#include "storage/proc.h"
@ -195,7 +196,7 @@ static IndexList *ILHead = NULL;
* AuxiliaryProcessMain
*
* The main entry point for auxiliary processes, such as the bgwriter,
* bootstrapper and the shared memory checker code.
* walwriter, bootstrapper and the shared memory checker code.
*
* This code is here just because of historical reasons.
*/
@ -331,6 +332,9 @@ AuxiliaryProcessMain(int argc, char *argv[])
case BgWriterProcess:
statmsg = "writer process";
break;
case WalWriterProcess:
statmsg = "wal writer process";
break;
default:
statmsg = "??? process";
break;
@ -419,6 +423,12 @@ AuxiliaryProcessMain(int argc, char *argv[])
InitXLOGAccess();
BackgroundWriterMain();
proc_exit(1); /* should never return */
case WalWriterProcess:
/* don't set signals, walwriter has its own agenda */
InitXLOGAccess();
WalWriterMain();
proc_exit(1); /* should never return */
default:
elog(PANIC, "unrecognized process type: %d", auxType);

View File

@ -4,7 +4,7 @@
# Makefile for src/backend/postmaster
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.22 2007/01/20 17:16:12 petere Exp $
# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.23 2007/07/24 04:54:09 tgl Exp $
#
#-------------------------------------------------------------------------
@ -12,8 +12,8 @@ subdir = src/backend/postmaster
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = bgwriter.o autovacuum.o pgarch.o pgstat.o postmaster.o syslogger.o \
fork_process.o
OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \
syslogger.o walwriter.o
all: SUBSYS.o

View File

@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.534 2007/07/23 10:16:54 mha Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.535 2007/07/24 04:54:09 tgl Exp $
*
* NOTES
*
@ -136,7 +136,7 @@ typedef struct bkend
{
pid_t pid; /* process id of backend */
long cancel_key; /* cancel key for cancels for this backend */
bool is_autovacuum; /* is it an autovacuum process */
bool is_autovacuum; /* is it an autovacuum process? */
} Backend;
static Dllist *BackendList;
@ -144,9 +144,9 @@ static Dllist *BackendList;
#ifdef EXEC_BACKEND
/*
* Number of entries in the backend table. Twice the number of backends,
* plus four other subprocesses (stats, bgwriter, autovac, logger).
* plus five other subprocesses (stats, bgwriter, walwriter, autovac, logger).
*/
#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 4)
#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 5)
static Backend *ShmemBackendArray;
#endif
@ -201,6 +201,7 @@ char *bonjour_name;
/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
BgWriterPID = 0,
WalWriterPID = 0,
AutoVacPID = 0,
PgArchPID = 0,
PgStatPID = 0,
@ -221,7 +222,7 @@ bool ClientAuthInProgress = false; /* T during new-client
bool redirection_done = false;
/* received START_AUTOVAC_LAUNCHER signal */
static bool start_autovac_launcher = false;
static volatile sig_atomic_t start_autovac_launcher = false;
/*
* State for assigning random salts and cancel keys.
@ -365,6 +366,7 @@ static void ShmemBackendArrayRemove(pid_t pid);
#define StartupDataBase() StartChildProcess(StartupProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
#define StartWalWriter() StartChildProcess(WalWriterProcess)
/* Macros to check exit status of a child process */
#define EXIT_STATUS_0(st) ((st) == 0)
@ -909,8 +911,9 @@ PostmasterMain(int argc, char *argv[])
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/autovacuum.c,
* postmaster/pgarch.c, postmaster/pgstat.c, and postmaster/syslogger.c.
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
* postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, and
* postmaster/syslogger.c.
*/
pqinitmask();
PG_SETMASK(&BlockSig);
@ -1244,6 +1247,15 @@ ServerLoop(void)
signal_child(BgWriterPID, SIGUSR2);
}
/*
* Likewise, if we have lost the walwriter process, try to start a
* new one. We don't need walwriter to complete a shutdown, so
* don't start it if shutdown already initiated.
*/
if (WalWriterPID == 0 &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
WalWriterPID = StartWalWriter();
/* If we have lost the autovacuum launcher, try to start a new one */
if (AutoVacPID == 0 &&
(AutoVacuumingActive() || start_autovac_launcher) &&
@ -1251,7 +1263,7 @@ ServerLoop(void)
{
AutoVacPID = StartAutoVacLauncher();
if (AutoVacPID != 0)
start_autovac_launcher = false; /* signal successfully processed */
start_autovac_launcher = false; /* signal processed */
}
/* If we have lost the archiver, try to start a new one */
@ -1842,6 +1854,8 @@ SIGHUP_handler(SIGNAL_ARGS)
SignalChildren(SIGHUP);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGHUP);
if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGHUP);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGHUP);
if (PgArchPID != 0)
@ -1901,8 +1915,11 @@ pmdie(SIGNAL_ARGS)
/* and the autovac launcher too */
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
/* and the walwriter too */
if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGTERM);
if (DLGetHead(BackendList) || AutoVacPID != 0)
if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0)
break; /* let reaper() handle this */
/*
@ -1938,7 +1955,7 @@ pmdie(SIGNAL_ARGS)
ereport(LOG,
(errmsg("received fast shutdown request")));
if (DLGetHead(BackendList) || AutoVacPID != 0)
if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0)
{
if (!FatalError)
{
@ -1947,6 +1964,8 @@ pmdie(SIGNAL_ARGS)
SignalChildren(SIGTERM);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM);
if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGTERM);
/* reaper() does the rest */
}
break;
@ -1957,6 +1976,7 @@ pmdie(SIGNAL_ARGS)
*
* Note: if we previously got SIGTERM then we may send SIGUSR2 to
* the bgwriter a second time here. This should be harmless.
* Ditto for the signals to the other special children.
*/
if (StartupPID != 0)
{
@ -1993,6 +2013,8 @@ pmdie(SIGNAL_ARGS)
signal_child(StartupPID, SIGQUIT);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGQUIT);
if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGQUIT);
if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGQUIT);
if (PgArchPID != 0)
@ -2091,13 +2113,14 @@ reaper(SIGNAL_ARGS)
/*
* Go to shutdown mode if a shutdown request was pending.
* Otherwise, try to start the archiver, stats collector and
* autovacuum launcher.
* Otherwise, try to start the other special children.
*/
if (Shutdown > NoShutdown && BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
else if (Shutdown == NoShutdown)
{
if (WalWriterPID == 0)
WalWriterPID = StartWalWriter();
if (XLogArchivingActive() && PgArchPID == 0)
PgArchPID = pgarch_start();
if (PgStatPID == 0)
@ -2121,7 +2144,8 @@ reaper(SIGNAL_ARGS)
BgWriterPID = 0;
if (EXIT_STATUS_0(exitstatus) &&
Shutdown > NoShutdown && !FatalError &&
!DLGetHead(BackendList) && AutoVacPID == 0)
!DLGetHead(BackendList) &&
WalWriterPID == 0 && AutoVacPID == 0)
{
/*
* Normal postmaster exit is here: we've seen normal exit of
@ -2150,7 +2174,8 @@ reaper(SIGNAL_ARGS)
* required will happen on next postmaster start.
*/
if (Shutdown > NoShutdown &&
!DLGetHead(BackendList) && AutoVacPID == 0)
!DLGetHead(BackendList) &&
WalWriterPID == 0 && AutoVacPID == 0)
{
ereport(LOG,
(errmsg("abnormal database system shutdown")));
@ -2161,6 +2186,20 @@ reaper(SIGNAL_ARGS)
continue;
}
/*
* Was it the wal writer? Normal exit can be ignored; we'll
* start a new one at the next iteration of the postmaster's main loop,
* if necessary. Any other exit condition is treated as a crash.
*/
if (WalWriterPID != 0 && pid == WalWriterPID)
{
WalWriterPID = 0;
if (!EXIT_STATUS_0(exitstatus))
HandleChildCrash(pid, exitstatus,
_("wal writer process"));
continue;
}
/*
* Was it the autovacuum launcher? Normal exit can be ignored; we'll
* start a new one at the next iteration of the postmaster's main loop,
@ -2233,7 +2272,8 @@ reaper(SIGNAL_ARGS)
* StartupDataBase. (We can ignore the archiver and stats processes
* here since they are not connected to shmem.)
*/
if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0 ||
if (DLGetHead(BackendList) || StartupPID != 0 ||
BgWriterPID != 0 || WalWriterPID != 0 ||
AutoVacPID != 0)
goto reaper_done;
ereport(LOG,
@ -2249,7 +2289,8 @@ reaper(SIGNAL_ARGS)
if (Shutdown > NoShutdown)
{
if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0)
if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0 ||
WalWriterPID != 0)
goto reaper_done;
/* Start the bgwriter if not running */
if (BgWriterPID == 0)
@ -2315,7 +2356,8 @@ CleanupBackend(int pid,
}
/*
* HandleChildCrash -- cleanup after failed backend, bgwriter, or autovacuum.
* HandleChildCrash -- cleanup after failed backend, bgwriter, walwriter,
* or autovacuum.
*
* The objectives here are to clean up our local state about the child
* process, and to signal all other remaining children to quickdie.
@ -2390,6 +2432,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Take care of the walwriter too */
if (pid == WalWriterPID)
WalWriterPID = 0;
else if (WalWriterPID != 0 && !FatalError)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
(SendStop ? "SIGSTOP" : "SIGQUIT"),
(int) WalWriterPID)));
signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Take care of the autovacuum launcher too */
if (pid == AutoVacPID)
AutoVacPID = 0;
@ -3622,9 +3676,11 @@ sigusr1_handler(SIGNAL_ARGS)
start_autovac_launcher = true;
}
/* The autovacuum launcher wants us to start a worker process. */
if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER))
{
/* The autovacuum launcher wants us to start a worker process. */
StartAutovacuumWorker();
}
PG_SETMASK(&UnBlockSig);
@ -3805,6 +3861,10 @@ StartChildProcess(AuxProcType type)
ereport(LOG,
(errmsg("could not fork background writer process: %m")));
break;
case WalWriterProcess:
ereport(LOG,
(errmsg("could not fork wal writer process: %m")));
break;
default:
ereport(LOG,
(errmsg("could not fork process: %m")));

View File

@ -0,0 +1,311 @@
/*-------------------------------------------------------------------------
*
* walwriter.c
*
* The WAL writer background process is new as of Postgres 8.3. It attempts
* to keep regular backends from having to write out (and fsync) WAL pages.
* Also, it guarantees that transaction commit records that weren't synced
* to disk immediately upon commit (ie, were "asynchronously committed")
* will reach disk within a knowable time --- which, as it happens, is at
* most three times the wal_writer_delay cycle time.
*
* Note that as with the bgwriter for shared buffers, regular backends are
* still empowered to issue WAL writes and fsyncs when the walwriter doesn't
* keep up.
*
* Because the walwriter's cycle is directly linked to the maximum delay
* before async-commit transactions are guaranteed committed, it's probably
* unwise to load additional functionality onto it. For instance, if you've
* got a yen to create xlog segments further in advance, that'd be better done
* in bgwriter than in walwriter.
*
* The walwriter is started by the postmaster as soon as the startup subprocess
* finishes. It remains alive until the postmaster commands it to terminate.
* Normal termination is by SIGTERM, which instructs the walwriter to exit(0).
* Emergency termination is by SIGQUIT; like any backend, the walwriter will
* simply abort and exit on SIGQUIT.
*
* If the walwriter exits unexpectedly, the postmaster treats that the same
* as a backend crash: shared memory may be corrupted, so remaining backends
* should be killed by SIGQUIT and then a recovery cycle started.
*
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/walwriter.c,v 1.1 2007/07/24 04:54:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <signal.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include "access/xlog.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "postmaster/walwriter.h"
#include "storage/bufmgr.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/smgr.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
/*
* GUC parameters
*/
int WalWriterDelay = 200;
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t shutdown_requested = false;
/* Signal handlers */
static void wal_quickdie(SIGNAL_ARGS);
static void WalSigHupHandler(SIGNAL_ARGS);
static void WalShutdownHandler(SIGNAL_ARGS);
/*
* Main entry point for walwriter process
*
* This is invoked from BootstrapMain, which has already created the basic
* execution environment, but not enabled signals yet.
*/
void
WalWriterMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext walwriter_context;
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (walwriter probably never has
* any child processes, but for consistency we make all postmaster
* child processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Properly accept or ignore signals the postmaster might send us
*
* We have no particular use for SIGINT at the moment, but seems
* reasonable to treat like SIGTERM.
*/
pqsignal(SIGHUP, WalSigHupHandler); /* set flag to read config file */
pqsignal(SIGINT, WalShutdownHandler); /* request shutdown */
pqsignal(SIGTERM, WalShutdownHandler); /* request shutdown */
pqsignal(SIGQUIT, wal_quickdie); /* hard crash time */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
pqsignal(SIGUSR2, SIG_IGN); /* not used */
/*
* Reset some signals that are accepted by postmaster but not here
*/
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
/* We allow SIGQUIT (quickdie) at all times */
#ifdef HAVE_SIGPROCMASK
sigdelset(&BlockSig, SIGQUIT);
#else
BlockSig &= ~(sigmask(SIGQUIT));
#endif
/*
* Create a resource owner to keep track of our resources (not clear
* that we need this, but may as well have one).
*/
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Writer");
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks. Formerly this code just ran in
* TopMemoryContext, but resetting that would be a really bad idea.
*/
walwriter_context = AllocSetContextCreate(TopMemoryContext,
"Wal Writer",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(walwriter_context);
/*
* If an exception is encountered, processing resumes here.
*
* This code is heavily based on bgwriter.c, q.v.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/*
* These operations are really just a minimal subset of
* AbortTransaction(). We don't have very many resources to worry
* about in walwriter, but we do have LWLocks, and perhaps buffers?
*/
LWLockReleaseAll();
AbortBufferIO();
UnlockBuffers();
/* buffer pins are released here: */
ResourceOwnerRelease(CurrentResourceOwner,
RESOURCE_RELEASE_BEFORE_LOCKS,
false, true);
/* we needn't bother with the other ResourceOwnerRelease phases */
AtEOXact_Buffers(false);
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
*/
MemoryContextSwitchTo(walwriter_context);
FlushErrorState();
/* Flush any leaked data in the top-level context */
MemoryContextResetAndDeleteChildren(walwriter_context);
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
/*
* Sleep at least 1 second after any error. A write error is likely
* to be repeated, and we don't want to be filling the error logs as
* fast as we can.
*/
pg_usleep(1000000L);
/*
* Close all open files after any error. This is helpful on Windows,
* where holding deleted files open causes various strange errors.
* It's not clear we need it elsewhere, but shouldn't hurt.
*/
smgrcloseall();
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
/*
* Unblock signals (they were blocked when the postmaster forked us)
*/
PG_SETMASK(&UnBlockSig);
/*
* Loop forever
*/
for (;;)
{
long udelay;
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
*/
if (!PostmasterIsAlive(true))
exit(1);
/*
* Process any requests or signals received recently.
*/
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
if (shutdown_requested)
{
/* Normal exit from the walwriter is here */
proc_exit(0); /* done */
}
/*
* Do what we're here for...
*/
XLogBackgroundFlush();
/*
* Delay until time to do something more, but fall out of delay
* reasonably quickly if signaled.
*/
udelay = WalWriterDelay * 1000L;
while (udelay > 999999L)
{
if (got_SIGHUP || shutdown_requested)
break;
pg_usleep(1000000L);
udelay -= 1000000L;
}
if (!(got_SIGHUP || shutdown_requested))
pg_usleep(udelay);
}
}
/* --------------------------------
* signal handler routines
* --------------------------------
*/
/*
* wal_quickdie() occurs when signalled SIGQUIT by the postmaster.
*
* Some backend has bought the farm,
* so we need to stop what we're doing and exit.
*/
static void
wal_quickdie(SIGNAL_ARGS)
{
PG_SETMASK(&BlockSig);
/*
* DO NOT proc_exit() -- we're here because shared memory may be
* corrupted, so we don't want to try to clean up our transaction. Just
* nail the windows shut and get out of town.
*
* Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our
* shared memory state.
*/
exit(2);
}
/* SIGHUP: set flag to re-read config file at next convenient time */
static void
WalSigHupHandler(SIGNAL_ARGS)
{
got_SIGHUP = true;
}
/* SIGTERM: set flag to exit normally */
static void
WalShutdownHandler(SIGNAL_ARGS)
{
shutdown_requested = true;
}

View File

@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.406 2007/07/24 01:53:56 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.407 2007/07/24 04:54:09 tgl Exp $
*
*--------------------------------------------------------------------
*/
@ -54,6 +54,7 @@
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
#include "postmaster/syslogger.h"
#include "postmaster/walwriter.h"
#include "storage/fd.h"
#include "storage/freespace.h"
#include "tcop/tcopprot.h"
@ -1509,6 +1510,16 @@ static struct config_int ConfigureNamesInt[] =
8, 4, INT_MAX, NULL, NULL
},
{
{"wal_writer_delay", PGC_SIGHUP, WAL_SETTINGS,
gettext_noop("WAL writer sleep time between WAL flushes."),
NULL,
GUC_UNIT_MS
},
&WalWriterDelay,
200, 1, 10000, NULL, NULL
},
{
{"commit_delay", PGC_USERSET, WAL_CHECKPOINTS,
gettext_noop("Sets the delay in microseconds between transaction commit and "

View File

@ -159,6 +159,8 @@
#full_page_writes = on # recover from partial page writes
#wal_buffers = 64kB # min 32kB
# (change requires restart)
#wal_writer_delay = 200ms # range 1-10000, in milliseconds
#commit_delay = 0 # range 0-100000, in microseconds
#commit_siblings = 5 # range 1-1000

View File

@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.80 2007/06/30 19:12:02 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.81 2007/07/24 04:54:09 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
@ -196,6 +196,7 @@ extern CheckpointStatsData CheckpointStats;
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
extern void XLogFlush(XLogRecPtr RecPtr);
extern void XLogBackgroundFlush(void);
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/bootstrap/bootstrap.h,v 1.46 2007/03/07 13:35:03 alvherre Exp $
* $PostgreSQL: pgsql/src/include/bootstrap/bootstrap.h,v 1.47 2007/07/24 04:54:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -69,7 +69,8 @@ typedef enum
CheckerProcess,
BootstrapProcess,
StartupProcess,
BgWriterProcess
BgWriterProcess,
WalWriterProcess
} AuxProcType;
#endif /* BOOTSTRAP_H */

View File

@ -0,0 +1,20 @@
/*-------------------------------------------------------------------------
*
* walwriter.h
* Exports from postmaster/walwriter.c.
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/postmaster/walwriter.h,v 1.1 2007/07/24 04:54:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef _WALWRITER_H
#define _WALWRITER_H
/* GUC options */
extern int WalWriterDelay;
extern void WalWriterMain(void);
#endif /* _WALWRITER_H */