Make archiver process an auxiliary process.

This commit changes WAL archiver process so that it's treated as
an auxiliary process and can use shared memory. This is an infrastructure
patch required for upcoming shared-memory based stats collector patch
series. These patch series basically need any processes including archiver
that can report the statistics to access to shared memory. Since this patch
itself is useful to simplify the code and when users monitor the status of
archiver, it's committed separately in advance.

This commit simplifies the code for WAL archiving. For example, previously
backends need to signal to archiver via postmaster when they notify
archiver that there are some WAL files to archive. On the other hand,
this commit removes that signal to postmaster and enables backends to
notify archier directly using shared latch.

Also, as the side of this change, the information about archiver process
becomes viewable at pg_stat_activity view.

Author: Kyotaro Horiguchi
Reviewed-by: Andres Freund, Álvaro Herrera, Julien Rouhaud, Tomas Vondra, Arthur Zakirov, Fujii Masao
Discussion: https://postgr.es/m/20180629.173418.190173462.horiguchi.kyotaro@lab.ntt.co.jp
This commit is contained in:
Fujii Masao 2021-03-15 13:13:14 +09:00
parent 0ea71c93a0
commit d75288fb27
11 changed files with 164 additions and 215 deletions

View File

@ -935,6 +935,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<literal>logical replication worker</literal>,
<literal>parallel worker</literal>, <literal>background writer</literal>,
<literal>client backend</literal>, <literal>checkpointer</literal>,
<literal>archiver</literal>,
<literal>startup</literal>, <literal>walreceiver</literal>,
<literal>walsender</literal> and <literal>walwriter</literal>.
In addition, background workers registered by extensions may have

View File

@ -25,11 +25,11 @@
#include "common/archive.h"
#include "miscadmin.h"
#include "postmaster/startup.h"
#include "postmaster/pgarch.h"
#include "replication/walsender.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
/*
* Attempt to retrieve the specified file from off-line archival storage.
@ -491,7 +491,7 @@ XLogArchiveNotify(const char *xlog)
/* Notify archiver that it's got something to do */
if (IsUnderPostmaster)
SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
PgArchWakeup();
}
/*

View File

@ -317,6 +317,9 @@ AuxiliaryProcessMain(int argc, char *argv[])
case StartupProcess:
MyBackendType = B_STARTUP;
break;
case ArchiverProcess:
MyBackendType = B_ARCHIVER;
break;
case BgWriterProcess:
MyBackendType = B_BG_WRITER;
break;
@ -437,30 +440,29 @@ AuxiliaryProcessMain(int argc, char *argv[])
proc_exit(1); /* should never return */
case StartupProcess:
/* don't set signals, startup process has its own agenda */
StartupProcessMain();
proc_exit(1); /* should never return */
proc_exit(1);
case ArchiverProcess:
PgArchiverMain();
proc_exit(1);
case BgWriterProcess:
/* don't set signals, bgwriter has its own agenda */
BackgroundWriterMain();
proc_exit(1); /* should never return */
proc_exit(1);
case CheckpointerProcess:
/* don't set signals, checkpointer has its own agenda */
CheckpointerMain();
proc_exit(1); /* should never return */
proc_exit(1);
case WalWriterProcess:
/* don't set signals, walwriter has its own agenda */
InitXLOGAccess();
WalWriterMain();
proc_exit(1); /* should never return */
proc_exit(1);
case WalReceiverProcess:
/* don't set signals, walreceiver has its own agenda */
WalReceiverMain();
proc_exit(1); /* should never return */
proc_exit(1);
default:
elog(PANIC, "unrecognized process type: %d", (int) MyAuxProcType);

View File

@ -38,16 +38,13 @@
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "postmaster/fork_process.h"
#include "postmaster/interrupt.h"
#include "postmaster/pgarch.h"
#include "postmaster/postmaster.h"
#include "storage/dsm.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/procsignal.h"
#include "utils/guc.h"
#include "utils/ps_status.h"
@ -73,153 +70,99 @@
*/
#define NUM_ORPHAN_CLEANUP_RETRIES 3
/* Shared memory area for archiver process */
typedef struct PgArchData
{
int pgprocno; /* pgprocno of archiver process */
} PgArchData;
/* ----------
* Local data
* ----------
*/
static time_t last_pgarch_start_time;
static time_t last_sigterm_time = 0;
static PgArchData *PgArch = NULL;
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
static volatile sig_atomic_t wakened = false;
static volatile sig_atomic_t ready_to_stop = false;
/* ----------
* Local function forward declarations
* ----------
*/
#ifdef EXEC_BACKEND
static pid_t pgarch_forkexec(void);
#endif
NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]) pg_attribute_noreturn();
static void pgarch_waken(SIGNAL_ARGS);
static void pgarch_waken_stop(SIGNAL_ARGS);
static void pgarch_MainLoop(void);
static void pgarch_ArchiverCopyLoop(void);
static bool pgarch_archiveXlog(char *xlog);
static bool pgarch_readyXlog(char *xlog);
static void pgarch_archiveDone(char *xlog);
static void pgarch_die(int code, Datum arg);
/* Report shared memory space needed by PgArchShmemInit */
Size
PgArchShmemSize(void)
{
Size size = 0;
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
*/
size = add_size(size, sizeof(PgArchData));
return size;
}
/* Allocate and initialize archiver-related shared memory */
void
PgArchShmemInit(void)
{
bool found;
PgArch = (PgArchData *)
ShmemInitStruct("Archiver Data", PgArchShmemSize(), &found);
if (!found)
{
/* First time through, so initialize */
MemSet(PgArch, 0, PgArchShmemSize());
PgArch->pgprocno = INVALID_PGPROCNO;
}
}
/*
* pgarch_start
* PgArchCanRestart
*
* Called from postmaster at startup or after an existing archiver
* died. Attempt to fire up a fresh archiver process.
* Return true and archiver is allowed to restart if enough time has
* passed since it was launched last to reach PGARCH_RESTART_INTERVAL.
* Otherwise return false.
*
* Returns PID of child process, or 0 if fail.
*
* Note: if fail, we will be called again from the postmaster main loop.
* This is a safety valve to protect against continuous respawn attempts if the
* archiver is dying immediately at launch. Note that since we will retry to
* launch the archiver from the postmaster main loop, we will get another
* chance later.
*/
int
pgarch_start(void)
bool
PgArchCanRestart(void)
{
time_t curtime;
pid_t pgArchPid;
static time_t last_pgarch_start_time = 0;
time_t curtime = time(NULL);
/*
* Do nothing if no archiver needed
* Return false and don't restart archiver if too soon since last archiver
* start.
*/
if (!XLogArchivingActive())
return 0;
/*
* Do nothing if too soon since last archiver start. This is a safety
* valve to protect against continuous respawn attempts if the archiver is
* dying immediately at launch. Note that since we will be re-called from
* the postmaster main loop, we will get another chance later.
*/
curtime = time(NULL);
if ((unsigned int) (curtime - last_pgarch_start_time) <
(unsigned int) PGARCH_RESTART_INTERVAL)
return 0;
return false;
last_pgarch_start_time = curtime;
#ifdef EXEC_BACKEND
switch ((pgArchPid = pgarch_forkexec()))
#else
switch ((pgArchPid = fork_process()))
#endif
{
case -1:
ereport(LOG,
(errmsg("could not fork archiver: %m")));
return 0;
#ifndef EXEC_BACKEND
case 0:
/* in postmaster child ... */
InitPostmasterChild();
/* Close the postmaster's sockets */
ClosePostmasterPorts(false);
/* Drop our connection to postmaster's shared memory, as well */
dsm_detach_all();
PGSharedMemoryDetach();
PgArchiverMain(0, NULL);
break;
#endif
default:
return (int) pgArchPid;
}
/* shouldn't get here */
return 0;
return true;
}
/* ------------------------------------------------------------
* Local functions called by archiver follow
* ------------------------------------------------------------
*/
#ifdef EXEC_BACKEND
/*
* pgarch_forkexec() -
*
* Format up the arglist for, then fork and exec, archive process
*/
static pid_t
pgarch_forkexec(void)
{
char *av[10];
int ac = 0;
av[ac++] = "postgres";
av[ac++] = "--forkarch";
av[ac++] = NULL; /* filled in by postmaster_forkexec */
av[ac] = NULL;
Assert(ac < lengthof(av));
return postmaster_forkexec(ac, av);
}
#endif /* EXEC_BACKEND */
/*
* PgArchiverMain
*
* The argc/argv parameters are valid only in EXEC_BACKEND case. However,
* since we don't use 'em, it hardly matters...
*/
NON_EXEC_STATIC void
PgArchiverMain(int argc, char *argv[])
/* Main entry point for archiver process */
void
PgArchiverMain(void)
{
/*
* Ignore all signals usually bound to some action in the postmaster,
@ -231,33 +174,51 @@ PgArchiverMain(int argc, char *argv[])
/* SIGQUIT handler was already set up by InitPostmasterChild */
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, pgarch_waken);
pqsignal(SIGUSR1, procsignal_sigusr1_handler);
pqsignal(SIGUSR2, pgarch_waken_stop);
/* Reset some signals that are accepted by postmaster but not here */
pqsignal(SIGCHLD, SIG_DFL);
/* Unblock signals (they were blocked when the postmaster forked us) */
PG_SETMASK(&UnBlockSig);
MyBackendType = B_ARCHIVER;
init_ps_display(NULL);
/* We shouldn't be launched unnecessarily. */
Assert(XLogArchivingActive());
/* Arrange to clean up at archiver exit */
on_shmem_exit(pgarch_die, 0);
/*
* Advertise our pgprocno so that backends can use our latch to wake us up
* while we're sleeping.
*/
PgArch->pgprocno = MyProc->pgprocno;
pgarch_MainLoop();
exit(0);
proc_exit(0);
}
/* SIGUSR1 signal handler for archiver process */
static void
pgarch_waken(SIGNAL_ARGS)
/*
* Wake up the archiver
*/
void
PgArchWakeup(void)
{
int save_errno = errno;
int arch_pgprocno = PgArch->pgprocno;
/* set flag that there is work to be done */
wakened = true;
SetLatch(MyLatch);
errno = save_errno;
/*
* We don't acquire ProcArrayLock here. It's actually fine because
* procLatch isn't ever freed, so we just can potentially set the wrong
* process' (or no process') latch. Even in that case the archiver will
* be relaunched shortly and will start archiving.
*/
if (arch_pgprocno != INVALID_PGPROCNO)
SetLatch(&ProcGlobal->allProcs[arch_pgprocno].procLatch);
}
/* SIGUSR2 signal handler for archiver process */
static void
pgarch_waken_stop(SIGNAL_ARGS)
@ -282,14 +243,6 @@ pgarch_MainLoop(void)
pg_time_t last_copy_time = 0;
bool time_to_stop;
/*
* We run the copy loop immediately upon entry, in case there are
* unarchived files left over from a previous database run (or maybe the
* archiver died unexpectedly). After that we wait for a signal or
* timeout before doing more.
*/
wakened = true;
/*
* There shouldn't be anything for the archiver to do except to wait for a
* signal ... however, the archiver exists to protect our data, so she
@ -328,12 +281,8 @@ pgarch_MainLoop(void)
}
/* Do what we're here for */
if (wakened || time_to_stop)
{
wakened = false;
pgarch_ArchiverCopyLoop();
last_copy_time = time(NULL);
}
pgarch_ArchiverCopyLoop();
last_copy_time = time(NULL);
/*
* Sleep until a signal is received, or until a poll is forced by
@ -354,13 +303,9 @@ pgarch_MainLoop(void)
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
timeout * 1000L,
WAIT_EVENT_ARCHIVER_MAIN);
if (rc & WL_TIMEOUT)
wakened = true;
if (rc & WL_POSTMASTER_DEATH)
time_to_stop = true;
}
else
wakened = true;
}
/*
@ -744,3 +689,15 @@ pgarch_archiveDone(char *xlog)
StatusFilePath(rlogdone, xlog, ".done");
(void) durable_rename(rlogready, rlogdone, WARNING);
}
/*
* pgarch_die
*
* Exit-time cleanup handler
*/
static void
pgarch_die(int code, Datum arg)
{
PgArch->pgprocno = INVALID_PGPROCNO;
}

View File

@ -443,9 +443,10 @@ static void InitPostmasterDeathWatchHandle(void);
* even during recovery.
*/
#define PgArchStartupAllowed() \
((XLogArchivingActive() && pmState == PM_RUN) || \
(XLogArchivingAlways() && \
(pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
(((XLogArchivingActive() && pmState == PM_RUN) || \
(XLogArchivingAlways() && \
(pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) && \
PgArchCanRestart())
#ifdef EXEC_BACKEND
@ -548,6 +549,7 @@ static void ShmemBackendArrayRemove(Backend *bn);
#endif /* EXEC_BACKEND */
#define StartupDataBase() StartChildProcess(StartupProcess)
#define StartArchiver() StartChildProcess(ArchiverProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
#define StartWalWriter() StartChildProcess(WalWriterProcess)
@ -1792,7 +1794,7 @@ ServerLoop(void)
/* If we have lost the archiver, try to start a new one. */
if (PgArchPID == 0 && PgArchStartupAllowed())
PgArchPID = pgarch_start();
PgArchPID = StartArchiver();
/* If we need to signal the autovacuum launcher, do so now */
if (avlauncher_needs_signal)
@ -3007,7 +3009,7 @@ reaper(SIGNAL_ARGS)
if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
AutoVacPID = StartAutoVacLauncher();
if (PgArchStartupAllowed() && PgArchPID == 0)
PgArchPID = pgarch_start();
PgArchPID = StartArchiver();
if (PgStatPID == 0)
PgStatPID = pgstat_start();
@ -3142,20 +3144,22 @@ reaper(SIGNAL_ARGS)
}
/*
* Was it the archiver? If so, just try to start a new one; no need
* to force reset of the rest of the system. (If fail, we'll try
* again in future cycles of the main loop.). Unless we were waiting
* for it to shut down; don't restart it in that case, and
* Was it the archiver? If exit status is zero (normal) or one (FATAL
* exit), we assume everything is all right just like normal backends
* and just try to restart a new one so that we immediately retry
* archiving remaining files. (If fail, we'll try again in future
* cycles of the postmaster's main loop.) Unless we were waiting for
* it to shut down; don't restart it in that case, and
* PostmasterStateMachine() will advance to the next shutdown step.
*/
if (pid == PgArchPID)
{
PgArchPID = 0;
if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("archiver process"),
pid, exitstatus);
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus,
_("archiver process"));
if (PgArchStartupAllowed())
PgArchPID = pgarch_start();
PgArchPID = StartArchiver();
continue;
}
@ -3403,7 +3407,7 @@ CleanupBackend(int pid,
/*
* HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
* walwriter, autovacuum, or background worker.
* walwriter, autovacuum, archiver or background worker.
*
* The objectives here are to clean up our local state about the child
* process, and to signal all other remaining children to quickdie.
@ -3609,19 +3613,16 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/*
* Force a power-cycle of the pgarch process too. (This isn't absolutely
* necessary, but it seems like a good idea for robustness, and it
* simplifies the state-machine logic in the case where a shutdown request
* arrives during crash processing.)
*/
if (PgArchPID != 0 && take_action)
/* Take care of the archiver too */
if (pid == PgArchPID)
PgArchPID = 0;
else if (PgArchPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
"SIGQUIT",
(SendStop ? "SIGSTOP" : "SIGQUIT"),
(int) PgArchPID)));
signal_child(PgArchPID, SIGQUIT);
signal_child(PgArchPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/*
@ -3804,12 +3805,11 @@ PostmasterStateMachine(void)
* (including autovac workers), no bgworkers (including unconnected
* ones), and no walwriter, autovac launcher or bgwriter. If we are
* doing crash recovery or an immediate shutdown then we expect the
* checkpointer to exit as well, otherwise not. The archiver, stats,
* and syslogger processes are disregarded since they are not
* connected to shared memory; we also disregard dead_end children
* here. Walsenders are also disregarded, they will be terminated
* later after writing the checkpoint record, like the archiver
* process.
* checkpointer to exit as well, otherwise not. The stats and
* syslogger processes are disregarded since they are not connected to
* shared memory; we also disregard dead_end children here. Walsenders
* and archiver are also disregarded, they will be terminated later
* after writing the checkpoint record.
*/
if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
StartupPID == 0 &&
@ -3912,6 +3912,7 @@ PostmasterStateMachine(void)
Assert(CheckpointerPID == 0);
Assert(WalWriterPID == 0);
Assert(AutoVacPID == 0);
Assert(PgArchPID == 0);
/* syslogger is not considered here */
pmState = PM_NO_CHILDREN;
}
@ -5037,12 +5038,6 @@ SubPostmasterMain(int argc, char *argv[])
StartBackgroundWorker();
}
if (strcmp(argv[1], "--forkarch") == 0)
{
/* Do not want to attach to shared memory */
PgArchiverMain(argc, argv); /* does not return */
}
if (strcmp(argv[1], "--forkcol") == 0)
{
/* Do not want to attach to shared memory */
@ -5140,7 +5135,7 @@ sigusr1_handler(SIGNAL_ARGS)
*/
Assert(PgArchPID == 0);
if (XLogArchivingAlways())
PgArchPID = pgarch_start();
PgArchPID = StartArchiver();
/*
* If we aren't planning to enter hot standby mode later, treat
@ -5194,16 +5189,6 @@ sigusr1_handler(SIGNAL_ARGS)
if (StartWorkerNeeded || HaveCrashedWorker)
maybe_start_bgworkers();
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
PgArchPID != 0)
{
/*
* Send SIGUSR1 to archiver process, to wake it up and begin archiving
* next WAL file.
*/
signal_child(PgArchPID, SIGUSR1);
}
/* Tell syslogger to rotate logfile if requested */
if (SysLoggerPID != 0)
{
@ -5445,6 +5430,10 @@ StartChildProcess(AuxProcType type)
ereport(LOG,
(errmsg("could not fork startup process: %m")));
break;
case ArchiverProcess:
ereport(LOG,
(errmsg("could not fork archiver process: %m")));
break;
case BgWriterProcess:
ereport(LOG,
(errmsg("could not fork background writer process: %m")));

View File

@ -144,6 +144,7 @@ CreateSharedMemoryAndSemaphores(void)
size = add_size(size, ReplicationOriginShmemSize());
size = add_size(size, WalSndShmemSize());
size = add_size(size, WalRcvShmemSize());
size = add_size(size, PgArchShmemSize());
size = add_size(size, ApplyLauncherShmemSize());
size = add_size(size, SnapMgrShmemSize());
size = add_size(size, BTreeShmemSize());
@ -258,6 +259,7 @@ CreateSharedMemoryAndSemaphores(void)
ReplicationOriginShmemInit();
WalSndShmemInit();
WalRcvShmemInit();
PgArchShmemInit();
ApplyLauncherShmemInit();
/*

View File

@ -417,6 +417,7 @@ typedef enum
BootstrapProcess,
StartupProcess,
BgWriterProcess,
ArchiverProcess,
CheckpointerProcess,
WalWriterProcess,
WalReceiverProcess,
@ -429,6 +430,7 @@ extern AuxProcType MyAuxProcType;
#define AmBootstrapProcess() (MyAuxProcType == BootstrapProcess)
#define AmStartupProcess() (MyAuxProcType == StartupProcess)
#define AmBackgroundWriterProcess() (MyAuxProcType == BgWriterProcess)
#define AmArchiverProcess() (MyAuxProcType == ArchiverProcess)
#define AmCheckpointerProcess() (MyAuxProcType == CheckpointerProcess)
#define AmWalWriterProcess() (MyAuxProcType == WalWriterProcess)
#define AmWalReceiverProcess() (MyAuxProcType == WalReceiverProcess)

View File

@ -26,14 +26,10 @@
#define MAX_XFN_CHARS 40
#define VALID_XFN_CHARS "0123456789ABCDEF.history.backup.partial"
/* ----------
* Functions called from postmaster
* ----------
*/
extern int pgarch_start(void);
#ifdef EXEC_BACKEND
extern void PgArchiverMain(int argc, char *argv[]) pg_attribute_noreturn();
#endif
extern Size PgArchShmemSize(void);
extern void PgArchShmemInit(void);
extern bool PgArchCanRestart(void);
extern void PgArchiverMain(void) pg_attribute_noreturn();
extern void PgArchWakeup(void);
#endif /* _PGARCH_H */

View File

@ -34,7 +34,6 @@ typedef enum
{
PMSIGNAL_RECOVERY_STARTED, /* recovery has started */
PMSIGNAL_BEGIN_HOT_STANDBY, /* begin Hot Standby */
PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */
PMSIGNAL_ROTATE_LOGFILE, /* send SIGUSR1 to syslogger to rotate logfile */
PMSIGNAL_START_AUTOVAC_LAUNCHER, /* start an autovacuum launcher */
PMSIGNAL_START_AUTOVAC_WORKER, /* start an autovacuum worker */

View File

@ -370,11 +370,11 @@ extern PGPROC *PreparedXactProcs;
* We set aside some extra PGPROC structures for auxiliary processes,
* ie things that aren't full-fledged backends but need shmem access.
*
* Background writer, checkpointer and WAL writer run during normal operation.
* Startup process and WAL receiver also consume 2 slots, but WAL writer is
* launched only after startup has exited, so we only need 4 slots.
* Background writer, checkpointer, WAL writer and archiver run during normal
* operation. Startup process and WAL receiver also consume 2 slots, but WAL
* writer is launched only after startup has exited, so we only need 5 slots.
*/
#define NUM_AUXILIARY_PROCS 4
#define NUM_AUXILIARY_PROCS 5
/* configurable options */
extern PGDLLIMPORT int DeadlockTimeout;

View File

@ -1572,6 +1572,7 @@ PGresAttValue
PGresParamDesc
PGresult
PGresult_data
PgArchData
PHANDLE
PLAINTREE
PLUID_AND_ATTRIBUTES