diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index b54448017d..fb912c0381 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -150,7 +150,6 @@ double CheckPointCompletionTarget = 0.5; * Flags set by interrupt handlers for later service in the main loop. */ static volatile sig_atomic_t got_SIGHUP = false; -static volatile sig_atomic_t checkpoint_requested = false; static volatile sig_atomic_t shutdown_requested = false; /* @@ -382,12 +381,6 @@ CheckpointerMain(void) */ UpdateSharedMemoryConfig(); } - if (checkpoint_requested) - { - checkpoint_requested = false; - do_checkpoint = true; - BgWriterStats.m_requested_checkpoints++; - } if (shutdown_requested) { /* @@ -401,6 +394,17 @@ CheckpointerMain(void) proc_exit(0); /* done */ } + /* + * Detect a pending checkpoint request by checking whether the flags + * word in shared memory is nonzero. We shouldn't need to acquire the + * ckpt_lck for this. + */ + if (((volatile CheckpointerShmemStruct *) CheckpointerShmem)->ckpt_flags) + { + do_checkpoint = true; + BgWriterStats.m_requested_checkpoints++; + } + /* * Force a checkpoint if too much time has elapsed since the last one. * Note that we count a timed checkpoint in stats only when this @@ -645,17 +649,14 @@ CheckArchiveTimeout(void) static bool ImmediateCheckpointRequested(void) { - if (checkpoint_requested) - { - volatile CheckpointerShmemStruct *cps = CheckpointerShmem; + volatile CheckpointerShmemStruct *cps = CheckpointerShmem; - /* - * We don't need to acquire the ckpt_lck in this case because we're - * only looking at a single flag bit. - */ - if (cps->ckpt_flags & CHECKPOINT_IMMEDIATE) - return true; - } + /* + * We don't need to acquire the ckpt_lck in this case because we're only + * looking at a single flag bit. + */ + if (cps->ckpt_flags & CHECKPOINT_IMMEDIATE) + return true; return false; } @@ -858,7 +859,10 @@ ReqCheckpointHandler(SIGNAL_ARGS) { int save_errno = errno; - checkpoint_requested = true; + /* + * The signalling process should have set ckpt_flags nonzero, so all we + * need do is ensure that our main loop gets kicked out of any wait. + */ SetLatch(MyLatch); errno = save_errno; @@ -997,31 +1001,35 @@ RequestCheckpoint(int flags) old_failed = CheckpointerShmem->ckpt_failed; old_started = CheckpointerShmem->ckpt_started; - CheckpointerShmem->ckpt_flags |= flags; + CheckpointerShmem->ckpt_flags |= (flags | CHECKPOINT_REQUESTED); SpinLockRelease(&CheckpointerShmem->ckpt_lck); /* * Send signal to request checkpoint. It's possible that the checkpointer * hasn't started yet, or is in process of restarting, so we will retry a - * few times if needed. Also, if not told to wait for the checkpoint to - * occur, we consider failure to send the signal to be nonfatal and merely - * LOG it. + * few times if needed. (Actually, more than a few times, since on slow + * or overloaded buildfarm machines, it's been observed that the + * checkpointer can take several seconds to start.) However, if not told + * to wait for the checkpoint to occur, we consider failure to send the + * signal to be nonfatal and merely LOG it. The checkpointer should see + * the request when it does start, with or without getting a signal. */ +#define MAX_SIGNAL_TRIES 600 /* max wait 60.0 sec */ for (ntries = 0;; ntries++) { if (CheckpointerShmem->checkpointer_pid == 0) { - if (ntries >= 20) /* max wait 2.0 sec */ + if (ntries >= MAX_SIGNAL_TRIES || !(flags & CHECKPOINT_WAIT)) { elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, - "could not request checkpoint because checkpointer not running"); + "could not signal for checkpoint: checkpointer is not running"); break; } } else if (kill(CheckpointerShmem->checkpointer_pid, SIGINT) != 0) { - if (ntries >= 20) /* max wait 2.0 sec */ + if (ntries >= MAX_SIGNAL_TRIES || !(flags & CHECKPOINT_WAIT)) { elog((flags & CHECKPOINT_WAIT) ? ERROR : LOG, "could not signal for checkpoint: %m"); diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 421ba6d775..ef6aabde9f 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -185,6 +185,8 @@ extern bool XLOG_DEBUG; /* These indicate the cause of a checkpoint request */ #define CHECKPOINT_CAUSE_XLOG 0x0040 /* XLOG consumption */ #define CHECKPOINT_CAUSE_TIME 0x0080 /* Elapsed time */ +/* We set this to ensure that ckpt_flags is not 0 if a request has been made */ +#define CHECKPOINT_REQUESTED 0x0100 /* Checkpoint request has been made */ /* * Flag bits for the record being inserted, set using XLogSetRecordFlags().