postgresql/src/backend/utils/misc/timeout.c

831 lines
24 KiB
C

/*-------------------------------------------------------------------------
*
* timeout.c
* Routines to multiplex SIGALRM interrupts for multiple timeout reasons.
*
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/utils/misc/timeout.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <sys/time.h>
#include "miscadmin.h"
#include "storage/latch.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
/* Data about any one timeout reason */
typedef struct timeout_params
{
TimeoutId index; /* identifier of timeout reason */
/* volatile because these may be changed from the signal handler */
volatile bool active; /* true if timeout is in active_timeouts[] */
volatile bool indicator; /* true if timeout has occurred */
/* callback function for timeout, or NULL if timeout not registered */
timeout_handler_proc timeout_handler;
TimestampTz start_time; /* time that timeout was last activated */
TimestampTz fin_time; /* time it is, or was last, due to fire */
int interval_in_ms; /* time between firings, or 0 if just once */
} timeout_params;
/*
* List of possible timeout reasons in the order of enum TimeoutId.
*/
static timeout_params all_timeouts[MAX_TIMEOUTS];
static bool all_timeouts_initialized = false;
/*
* List of active timeouts ordered by their fin_time and priority.
* This list is subject to change by the interrupt handler, so it's volatile.
*/
static volatile int num_active_timeouts = 0;
static timeout_params *volatile active_timeouts[MAX_TIMEOUTS];
/*
* Flag controlling whether the signal handler is allowed to do anything.
* This is useful to avoid race conditions with the handler. Note in
* particular that this lets us make changes in the data structures without
* tediously disabling and re-enabling the timer signal. Most of the time,
* no interrupt would happen anyway during such critical sections, but if
* one does, this rule ensures it's safe. Leaving the signal enabled across
* multiple operations can greatly reduce the number of kernel calls we make,
* too. See comments in schedule_alarm() about that.
*
* We leave this "false" when we're not expecting interrupts, just in case.
*/
static volatile sig_atomic_t alarm_enabled = false;
#define disable_alarm() (alarm_enabled = false)
#define enable_alarm() (alarm_enabled = true)
/*
* State recording if and when we next expect the interrupt to fire.
* (signal_due_at is valid only when signal_pending is true.)
* Note that the signal handler will unconditionally reset signal_pending to
* false, so that can change asynchronously even when alarm_enabled is false.
*/
static volatile sig_atomic_t signal_pending = false;
static volatile TimestampTz signal_due_at = 0;
/*****************************************************************************
* Internal helper functions
*
* For all of these, it is caller's responsibility to protect them from
* interruption by the signal handler. Generally, call disable_alarm()
* first to prevent interruption, then update state, and last call
* schedule_alarm(), which will re-enable the signal handler if needed.
*****************************************************************************/
/*
* Find the index of a given timeout reason in the active array.
* If it's not there, return -1.
*/
static int
find_active_timeout(TimeoutId id)
{
int i;
for (i = 0; i < num_active_timeouts; i++)
{
if (active_timeouts[i]->index == id)
return i;
}
return -1;
}
/*
* Insert specified timeout reason into the list of active timeouts
* at the given index.
*/
static void
insert_timeout(TimeoutId id, int index)
{
int i;
if (index < 0 || index > num_active_timeouts)
elog(FATAL, "timeout index %d out of range 0..%d", index,
num_active_timeouts);
Assert(!all_timeouts[id].active);
all_timeouts[id].active = true;
for (i = num_active_timeouts - 1; i >= index; i--)
active_timeouts[i + 1] = active_timeouts[i];
active_timeouts[index] = &all_timeouts[id];
num_active_timeouts++;
}
/*
* Remove the index'th element from the timeout list.
*/
static void
remove_timeout_index(int index)
{
int i;
if (index < 0 || index >= num_active_timeouts)
elog(FATAL, "timeout index %d out of range 0..%d", index,
num_active_timeouts - 1);
Assert(active_timeouts[index]->active);
active_timeouts[index]->active = false;
for (i = index + 1; i < num_active_timeouts; i++)
active_timeouts[i - 1] = active_timeouts[i];
num_active_timeouts--;
}
/*
* Enable the specified timeout reason
*/
static void
enable_timeout(TimeoutId id, TimestampTz now, TimestampTz fin_time,
int interval_in_ms)
{
int i;
/* Assert request is sane */
Assert(all_timeouts_initialized);
Assert(all_timeouts[id].timeout_handler != NULL);
/*
* If this timeout was already active, momentarily disable it. We
* interpret the call as a directive to reschedule the timeout.
*/
if (all_timeouts[id].active)
remove_timeout_index(find_active_timeout(id));
/*
* Find out the index where to insert the new timeout. We sort by
* fin_time, and for equal fin_time by priority.
*/
for (i = 0; i < num_active_timeouts; i++)
{
timeout_params *old_timeout = active_timeouts[i];
if (fin_time < old_timeout->fin_time)
break;
if (fin_time == old_timeout->fin_time && id < old_timeout->index)
break;
}
/*
* Mark the timeout active, and insert it into the active list.
*/
all_timeouts[id].indicator = false;
all_timeouts[id].start_time = now;
all_timeouts[id].fin_time = fin_time;
all_timeouts[id].interval_in_ms = interval_in_ms;
insert_timeout(id, i);
}
/*
* Schedule alarm for the next active timeout, if any
*
* We assume the caller has obtained the current time, or a close-enough
* approximation. (It's okay if a tick or two has passed since "now", or
* if a little more time elapses before we reach the kernel call; that will
* cause us to ask for an interrupt a tick or two later than the nearest
* timeout, which is no big deal. Passing a "now" value that's in the future
* would be bad though.)
*/
static void
schedule_alarm(TimestampTz now)
{
if (num_active_timeouts > 0)
{
struct itimerval timeval;
TimestampTz nearest_timeout;
long secs;
int usecs;
MemSet(&timeval, 0, sizeof(struct itimerval));
/*
* If we think there's a signal pending, but current time is more than
* 10ms past when the signal was due, then assume that the timeout
* request got lost somehow; clear signal_pending so that we'll reset
* the interrupt request below. (10ms corresponds to the worst-case
* timeout granularity on modern systems.) It won't hurt us if the
* interrupt does manage to fire between now and when we reach the
* setitimer() call.
*/
if (signal_pending && now > signal_due_at + 10 * 1000)
signal_pending = false;
/*
* Get the time remaining till the nearest pending timeout. If it is
* negative, assume that we somehow missed an interrupt, and clear
* signal_pending. This gives us another chance to recover if the
* kernel drops a timeout request for some reason.
*/
nearest_timeout = active_timeouts[0]->fin_time;
if (now > nearest_timeout)
{
signal_pending = false;
/* force an interrupt as soon as possible */
secs = 0;
usecs = 1;
}
else
{
TimestampDifference(now, nearest_timeout,
&secs, &usecs);
/*
* It's possible that the difference is less than a microsecond;
* ensure we don't cancel, rather than set, the interrupt.
*/
if (secs == 0 && usecs == 0)
usecs = 1;
}
timeval.it_value.tv_sec = secs;
timeval.it_value.tv_usec = usecs;
/*
* We must enable the signal handler before calling setitimer(); if we
* did it in the other order, we'd have a race condition wherein the
* interrupt could occur before we can set alarm_enabled, so that the
* signal handler would fail to do anything.
*
* Because we didn't bother to disable the timer in disable_alarm(),
* it's possible that a previously-set interrupt will fire between
* enable_alarm() and setitimer(). This is safe, however. There are
* two possible outcomes:
*
* 1. The signal handler finds nothing to do (because the nearest
* timeout event is still in the future). It will re-set the timer
* and return. Then we'll overwrite the timer value with a new one.
* This will mean that the timer fires a little later than we
* intended, but only by the amount of time it takes for the signal
* handler to do nothing useful, which shouldn't be much.
*
* 2. The signal handler executes and removes one or more timeout
* events. When it returns, either the queue is now empty or the
* frontmost event is later than the one we looked at above. So we'll
* overwrite the timer value with one that is too soon (plus or minus
* the signal handler's execution time), causing a useless interrupt
* to occur. But the handler will then re-set the timer and
* everything will still work as expected.
*
* Since these cases are of very low probability (the window here
* being quite narrow), it's not worth adding cycles to the mainline
* code to prevent occasional wasted interrupts.
*/
enable_alarm();
/*
* If there is already an interrupt pending that's at or before the
* needed time, we need not do anything more. The signal handler will
* do the right thing in the first case, and re-schedule the interrupt
* for later in the second case. It might seem that the extra
* interrupt is wasted work, but it's not terribly much work, and this
* method has very significant advantages in the common use-case where
* we repeatedly set a timeout that we don't expect to reach and then
* cancel it. Instead of invoking setitimer() every time the timeout
* is set or canceled, we perform one interrupt and a re-scheduling
* setitimer() call at intervals roughly equal to the timeout delay.
* For example, with statement_timeout = 1s and a throughput of
* thousands of queries per second, this method requires an interrupt
* and setitimer() call roughly once a second, rather than thousands
* of setitimer() calls per second.
*
* Because of the possible passage of time between when we obtained
* "now" and when we reach setitimer(), the kernel's opinion of when
* to trigger the interrupt is likely to be a bit later than
* signal_due_at. That's fine, for the same reasons described above.
*/
if (signal_pending && nearest_timeout >= signal_due_at)
return;
/*
* As with calling enable_alarm(), we must set signal_pending *before*
* calling setitimer(); if we did it after, the signal handler could
* trigger before we set it, leaving us with a false opinion that a
* signal is still coming.
*
* Other race conditions involved with setting/checking signal_pending
* are okay, for the reasons described above. One additional point is
* that the signal handler could fire after we set signal_due_at, but
* still before the setitimer() call. Then the handler could
* overwrite signal_due_at with a value it computes, which will be the
* same as or perhaps later than what we just computed. After we
* perform setitimer(), the net effect would be that signal_due_at
* gives a time later than when the interrupt will really happen;
* which is a safe situation.
*/
signal_due_at = nearest_timeout;
signal_pending = true;
/* Set the alarm timer */
if (setitimer(ITIMER_REAL, &timeval, NULL) != 0)
{
/*
* Clearing signal_pending here is a bit pro forma, but not
* entirely so, since something in the FATAL exit path could try
* to use timeout facilities.
*/
signal_pending = false;
elog(FATAL, "could not enable SIGALRM timer: %m");
}
}
}
/*****************************************************************************
* Signal handler
*****************************************************************************/
/*
* Signal handler for SIGALRM
*
* Process any active timeout reasons and then reschedule the interrupt
* as needed.
*/
static void
handle_sig_alarm(SIGNAL_ARGS)
{
/*
* Bump the holdoff counter, to make sure nothing we call will process
* interrupts directly. No timeout handler should do that, but these
* failures are hard to debug, so better be sure.
*/
HOLD_INTERRUPTS();
/*
* SIGALRM is always cause for waking anything waiting on the process
* latch.
*/
SetLatch(MyLatch);
/*
* Always reset signal_pending, even if !alarm_enabled, since indeed no
* signal is now pending.
*/
signal_pending = false;
/*
* Fire any pending timeouts, but only if we're enabled to do so.
*/
if (alarm_enabled)
{
/*
* Disable alarms, just in case this platform allows signal handlers
* to interrupt themselves. schedule_alarm() will re-enable if
* appropriate.
*/
disable_alarm();
if (num_active_timeouts > 0)
{
TimestampTz now = GetCurrentTimestamp();
/* While the first pending timeout has been reached ... */
while (num_active_timeouts > 0 &&
now >= active_timeouts[0]->fin_time)
{
timeout_params *this_timeout = active_timeouts[0];
/* Remove it from the active list */
remove_timeout_index(0);
/* Mark it as fired */
this_timeout->indicator = true;
/* And call its handler function */
this_timeout->timeout_handler();
/* If it should fire repeatedly, re-enable it. */
if (this_timeout->interval_in_ms > 0)
{
TimestampTz new_fin_time;
/*
* To guard against drift, schedule the next instance of
* the timeout based on the intended firing time rather
* than the actual firing time. But if the timeout was so
* late that we missed an entire cycle, fall back to
* scheduling based on the actual firing time.
*/
new_fin_time =
TimestampTzPlusMilliseconds(this_timeout->fin_time,
this_timeout->interval_in_ms);
if (new_fin_time < now)
new_fin_time =
TimestampTzPlusMilliseconds(now,
this_timeout->interval_in_ms);
enable_timeout(this_timeout->index, now, new_fin_time,
this_timeout->interval_in_ms);
}
/*
* The handler might not take negligible time (CheckDeadLock
* for instance isn't too cheap), so let's update our idea of
* "now" after each one.
*/
now = GetCurrentTimestamp();
}
/* Done firing timeouts, so reschedule next interrupt if any */
schedule_alarm(now);
}
}
RESUME_INTERRUPTS();
}
/*****************************************************************************
* Public API
*****************************************************************************/
/*
* Initialize timeout module.
*
* This must be called in every process that wants to use timeouts.
*
* If the process was forked from another one that was also using this
* module, be sure to call this before re-enabling signals; else handlers
* meant to run in the parent process might get invoked in this one.
*/
void
InitializeTimeouts(void)
{
int i;
/* Initialize, or re-initialize, all local state */
disable_alarm();
num_active_timeouts = 0;
for (i = 0; i < MAX_TIMEOUTS; i++)
{
all_timeouts[i].index = i;
all_timeouts[i].active = false;
all_timeouts[i].indicator = false;
all_timeouts[i].timeout_handler = NULL;
all_timeouts[i].start_time = 0;
all_timeouts[i].fin_time = 0;
all_timeouts[i].interval_in_ms = 0;
}
all_timeouts_initialized = true;
/* Now establish the signal handler */
pqsignal(SIGALRM, handle_sig_alarm);
}
/*
* Register a timeout reason
*
* For predefined timeouts, this just registers the callback function.
*
* For user-defined timeouts, pass id == USER_TIMEOUT; we then allocate and
* return a timeout ID.
*/
TimeoutId
RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
{
Assert(all_timeouts_initialized);
/* There's no need to disable the signal handler here. */
if (id >= USER_TIMEOUT)
{
/* Allocate a user-defined timeout reason */
for (id = USER_TIMEOUT; id < MAX_TIMEOUTS; id++)
if (all_timeouts[id].timeout_handler == NULL)
break;
if (id >= MAX_TIMEOUTS)
ereport(FATAL,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
errmsg("cannot add more timeout reasons")));
}
Assert(all_timeouts[id].timeout_handler == NULL);
all_timeouts[id].timeout_handler = handler;
return id;
}
/*
* Reschedule any pending SIGALRM interrupt.
*
* This can be used during error recovery in case query cancel resulted in loss
* of a SIGALRM event (due to longjmp'ing out of handle_sig_alarm before it
* could do anything). But note it's not necessary if any of the public
* enable_ or disable_timeout functions are called in the same area, since
* those all do schedule_alarm() internally if needed.
*/
void
reschedule_timeouts(void)
{
/* For flexibility, allow this to be called before we're initialized. */
if (!all_timeouts_initialized)
return;
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Reschedule the interrupt, if any timeouts remain active. */
if (num_active_timeouts > 0)
schedule_alarm(GetCurrentTimestamp());
}
/*
* Enable the specified timeout to fire after the specified delay.
*
* Delay is given in milliseconds.
*/
void
enable_timeout_after(TimeoutId id, int delay_ms)
{
TimestampTz now;
TimestampTz fin_time;
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Queue the timeout at the appropriate time. */
now = GetCurrentTimestamp();
fin_time = TimestampTzPlusMilliseconds(now, delay_ms);
enable_timeout(id, now, fin_time, 0);
/* Set the timer interrupt. */
schedule_alarm(now);
}
/*
* Enable the specified timeout to fire periodically, with the specified
* delay as the time between firings.
*
* Delay is given in milliseconds.
*/
void
enable_timeout_every(TimeoutId id, TimestampTz fin_time, int delay_ms)
{
TimestampTz now;
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Queue the timeout at the appropriate time. */
now = GetCurrentTimestamp();
enable_timeout(id, now, fin_time, delay_ms);
/* Set the timer interrupt. */
schedule_alarm(now);
}
/*
* Enable the specified timeout to fire at the specified time.
*
* This is provided to support cases where there's a reason to calculate
* the timeout by reference to some point other than "now". If there isn't,
* use enable_timeout_after(), to avoid calling GetCurrentTimestamp() twice.
*/
void
enable_timeout_at(TimeoutId id, TimestampTz fin_time)
{
TimestampTz now;
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Queue the timeout at the appropriate time. */
now = GetCurrentTimestamp();
enable_timeout(id, now, fin_time, 0);
/* Set the timer interrupt. */
schedule_alarm(now);
}
/*
* Enable multiple timeouts at once.
*
* This works like calling enable_timeout_after() and/or enable_timeout_at()
* multiple times. Use this to reduce the number of GetCurrentTimestamp()
* and setitimer() calls needed to establish multiple timeouts.
*/
void
enable_timeouts(const EnableTimeoutParams *timeouts, int count)
{
TimestampTz now;
int i;
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Queue the timeout(s) at the appropriate times. */
now = GetCurrentTimestamp();
for (i = 0; i < count; i++)
{
TimeoutId id = timeouts[i].id;
TimestampTz fin_time;
switch (timeouts[i].type)
{
case TMPARAM_AFTER:
fin_time = TimestampTzPlusMilliseconds(now,
timeouts[i].delay_ms);
enable_timeout(id, now, fin_time, 0);
break;
case TMPARAM_AT:
enable_timeout(id, now, timeouts[i].fin_time, 0);
break;
case TMPARAM_EVERY:
fin_time = TimestampTzPlusMilliseconds(now,
timeouts[i].delay_ms);
enable_timeout(id, now, fin_time, timeouts[i].delay_ms);
break;
default:
elog(ERROR, "unrecognized timeout type %d",
(int) timeouts[i].type);
break;
}
}
/* Set the timer interrupt. */
schedule_alarm(now);
}
/*
* Cancel the specified timeout.
*
* The timeout's I've-been-fired indicator is reset,
* unless keep_indicator is true.
*
* When a timeout is canceled, any other active timeout remains in force.
* It's not an error to disable a timeout that is not enabled.
*/
void
disable_timeout(TimeoutId id, bool keep_indicator)
{
/* Assert request is sane */
Assert(all_timeouts_initialized);
Assert(all_timeouts[id].timeout_handler != NULL);
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Find the timeout and remove it from the active list. */
if (all_timeouts[id].active)
remove_timeout_index(find_active_timeout(id));
/* Mark it inactive, whether it was active or not. */
if (!keep_indicator)
all_timeouts[id].indicator = false;
/* Reschedule the interrupt, if any timeouts remain active. */
if (num_active_timeouts > 0)
schedule_alarm(GetCurrentTimestamp());
}
/*
* Cancel multiple timeouts at once.
*
* The timeouts' I've-been-fired indicators are reset,
* unless timeouts[i].keep_indicator is true.
*
* This works like calling disable_timeout() multiple times.
* Use this to reduce the number of GetCurrentTimestamp()
* and setitimer() calls needed to cancel multiple timeouts.
*/
void
disable_timeouts(const DisableTimeoutParams *timeouts, int count)
{
int i;
Assert(all_timeouts_initialized);
/* Disable timeout interrupts for safety. */
disable_alarm();
/* Cancel the timeout(s). */
for (i = 0; i < count; i++)
{
TimeoutId id = timeouts[i].id;
Assert(all_timeouts[id].timeout_handler != NULL);
if (all_timeouts[id].active)
remove_timeout_index(find_active_timeout(id));
if (!timeouts[i].keep_indicator)
all_timeouts[id].indicator = false;
}
/* Reschedule the interrupt, if any timeouts remain active. */
if (num_active_timeouts > 0)
schedule_alarm(GetCurrentTimestamp());
}
/*
* Disable the signal handler, remove all timeouts from the active list,
* and optionally reset their timeout indicators.
*/
void
disable_all_timeouts(bool keep_indicators)
{
int i;
disable_alarm();
/*
* We used to disable the timer interrupt here, but in common usage
* patterns it's cheaper to leave it enabled; that may save us from having
* to enable it again shortly. See comments in schedule_alarm().
*/
num_active_timeouts = 0;
for (i = 0; i < MAX_TIMEOUTS; i++)
{
all_timeouts[i].active = false;
if (!keep_indicators)
all_timeouts[i].indicator = false;
}
}
/*
* Return true if the timeout is active (enabled and not yet fired)
*
* This is, of course, subject to race conditions, as the timeout could fire
* immediately after we look.
*/
bool
get_timeout_active(TimeoutId id)
{
return all_timeouts[id].active;
}
/*
* Return the timeout's I've-been-fired indicator
*
* If reset_indicator is true, reset the indicator when returning true.
* To avoid missing timeouts due to race conditions, we are careful not to
* reset the indicator when returning false.
*/
bool
get_timeout_indicator(TimeoutId id, bool reset_indicator)
{
if (all_timeouts[id].indicator)
{
if (reset_indicator)
all_timeouts[id].indicator = false;
return true;
}
return false;
}
/*
* Return the time when the timeout was most recently activated
*
* Note: will return 0 if timeout has never been activated in this process.
* However, we do *not* reset the start_time when a timeout occurs, so as
* not to create a race condition if SIGALRM fires just as some code is
* about to fetch the value.
*/
TimestampTz
get_timeout_start_time(TimeoutId id)
{
return all_timeouts[id].start_time;
}
/*
* Return the time when the timeout is, or most recently was, due to fire
*
* Note: will return 0 if timeout has never been activated in this process.
* However, we do *not* reset the fin_time when a timeout occurs, so as
* not to create a race condition if SIGALRM fires just as some code is
* about to fetch the value.
*/
TimestampTz
get_timeout_finish_time(TimeoutId id)
{
return all_timeouts[id].fin_time;
}