postgresql/src/include/miscadmin.h

467 lines
16 KiB
C
Raw Normal View History

1996-10-31 08:10:14 +01:00
/*-------------------------------------------------------------------------
*
* miscadmin.h
* This file contains general postgres administration and initialization
* stuff that used to be spread out between the following files:
* globals.h global variables
* pdir.h directory path crud
* pinit.h postgres initialization
* pmod.h processing modes
* Over time, this has also become the preferred place for widely known
* resource-limitation stuff, such as work_mem and check_stack_depth().
1996-10-31 08:10:14 +01:00
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
1996-10-31 08:10:14 +01:00
*
2010-09-20 22:08:53 +02:00
* src/include/miscadmin.h
1996-10-31 08:10:14 +01:00
*
* NOTES
* some of the information in this file should be moved to other files.
1996-10-31 08:10:14 +01:00
*
*-------------------------------------------------------------------------
*/
#ifndef MISCADMIN_H
#define MISCADMIN_H
#include "pgtime.h" /* for pg_time_t */
#define PG_BACKEND_VERSIONSTR "postgres (PostgreSQL) " PG_VERSION "\n"
#define InvalidPid (-1)
/*****************************************************************************
2001-03-22 05:01:46 +01:00
* System interrupt and critical section handling
*
* There are two types of interrupts that a running backend needs to accept
* without messing up its state: QueryCancel (SIGINT) and ProcDie (SIGTERM).
* In both cases, we need to be able to clean up the current transaction
* gracefully, so we can't respond to the interrupt instantaneously ---
* there's no guarantee that internal data structures would be self-consistent
* if the code is interrupted at an arbitrary instant. Instead, the signal
* handlers set flags that are checked periodically during execution.
*
* The CHECK_FOR_INTERRUPTS() macro is called at strategically located spots
* where it is normally safe to accept a cancel or die interrupt. In some
* cases, we invoke CHECK_FOR_INTERRUPTS() inside low-level subroutines that
* might sometimes be called in contexts that do *not* want to allow a cancel
* or die interrupt. The HOLD_INTERRUPTS() and RESUME_INTERRUPTS() macros
* allow code to ensure that no cancel or die interrupt will be accepted,
* even if CHECK_FOR_INTERRUPTS() gets called in a subroutine. The interrupt
* will be held off until CHECK_FOR_INTERRUPTS() is done outside any
* HOLD_INTERRUPTS() ... RESUME_INTERRUPTS() section.
*
Be more careful to not lose sync in the FE/BE protocol. If any error occurred while we were in the middle of reading a protocol message from the client, we could lose sync, and incorrectly try to interpret a part of another message as a new protocol message. That will usually lead to an "invalid frontend message" error that terminates the connection. However, this is a security issue because an attacker might be able to deliberately cause an error, inject a Query message in what's supposed to be just user data, and have the server execute it. We were quite careful to not have CHECK_FOR_INTERRUPTS() calls or other operations that could ereport(ERROR) in the middle of processing a message, but a query cancel interrupt or statement timeout could nevertheless cause it to happen. Also, the V2 fastpath and COPY handling were not so careful. It's very difficult to recover in the V2 COPY protocol, so we will just terminate the connection on error. In practice, that's what happened previously anyway, as we lost protocol sync. To fix, add a new variable in pqcomm.c, PqCommReadingMsg, that is set whenever we're in the middle of reading a message. When it's set, we cannot safely ERROR out and continue running, because we might've read only part of a message. PqCommReadingMsg acts somewhat similarly to critical sections in that if an error occurs while it's set, the error handler will force the connection to be terminated, as if the error was FATAL. It's not implemented by promoting ERROR to FATAL in elog.c, like ERROR is promoted to PANIC in critical sections, because we want to be able to use PG_TRY/CATCH to recover and regain protocol sync. pq_getmessage() takes advantage of that to prevent an OOM error from terminating the connection. To prevent unnecessary connection terminations, add a holdoff mechanism similar to HOLD/RESUME_INTERRUPTS() that can be used hold off query cancel interrupts, but still allow die interrupts. The rules on which interrupts are processed when are now a bit more complicated, so refactor ProcessInterrupts() and the calls to it in signal handlers so that the signal handlers always call it if ImmediateInterruptOK is set, and ProcessInterrupts() can decide to not do anything if the other conditions are not met. Reported by Emil Lenngren. Patch reviewed by Noah Misch and Andres Freund. Backpatch to all supported versions. Security: CVE-2015-0244
2015-02-02 16:08:45 +01:00
* There is also a mechanism to prevent query cancel interrupts, while still
* allowing die interrupts: HOLD_CANCEL_INTERRUPTS() and
* RESUME_CANCEL_INTERRUPTS().
*
* Special mechanisms are used to let an interrupt be accepted when we are
* waiting for a lock or when we are waiting for command input (but, of
* course, only if the interrupt holdoff counter is zero). See the
* related code for details.
*
* A lost connection is handled similarly, although the loss of connection
* does not raise a signal, but is detected when we fail to write to the
* socket. If there was a signal for a broken connection, we could make use of
* it by setting ClientConnectionLost in the signal handler.
*
* A related, but conceptually distinct, mechanism is the "critical section"
* mechanism. A critical section not only holds off cancel/die interrupts,
2003-07-27 19:10:07 +02:00
* but causes any ereport(ERROR) or ereport(FATAL) to become ereport(PANIC)
* --- that is, a system-wide reset is forced. Needless to say, only really
2003-08-04 02:43:34 +02:00
* *critical* code should be marked as a critical section! Currently, this
2003-07-27 19:10:07 +02:00
* mechanism is only used for XLOG-related code.
*
*****************************************************************************/
/* in globals.c */
/* these are marked volatile because they are set by signal handlers: */
extern PGDLLIMPORT volatile bool InterruptPending;
extern PGDLLIMPORT volatile bool QueryCancelPending;
extern PGDLLIMPORT volatile bool ProcDiePending;
2001-03-22 05:01:46 +01:00
extern volatile bool ClientConnectionLost;
/* these are marked volatile because they are examined by signal handlers: */
extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount;
Be more careful to not lose sync in the FE/BE protocol. If any error occurred while we were in the middle of reading a protocol message from the client, we could lose sync, and incorrectly try to interpret a part of another message as a new protocol message. That will usually lead to an "invalid frontend message" error that terminates the connection. However, this is a security issue because an attacker might be able to deliberately cause an error, inject a Query message in what's supposed to be just user data, and have the server execute it. We were quite careful to not have CHECK_FOR_INTERRUPTS() calls or other operations that could ereport(ERROR) in the middle of processing a message, but a query cancel interrupt or statement timeout could nevertheless cause it to happen. Also, the V2 fastpath and COPY handling were not so careful. It's very difficult to recover in the V2 COPY protocol, so we will just terminate the connection on error. In practice, that's what happened previously anyway, as we lost protocol sync. To fix, add a new variable in pqcomm.c, PqCommReadingMsg, that is set whenever we're in the middle of reading a message. When it's set, we cannot safely ERROR out and continue running, because we might've read only part of a message. PqCommReadingMsg acts somewhat similarly to critical sections in that if an error occurs while it's set, the error handler will force the connection to be terminated, as if the error was FATAL. It's not implemented by promoting ERROR to FATAL in elog.c, like ERROR is promoted to PANIC in critical sections, because we want to be able to use PG_TRY/CATCH to recover and regain protocol sync. pq_getmessage() takes advantage of that to prevent an OOM error from terminating the connection. To prevent unnecessary connection terminations, add a holdoff mechanism similar to HOLD/RESUME_INTERRUPTS() that can be used hold off query cancel interrupts, but still allow die interrupts. The rules on which interrupts are processed when are now a bit more complicated, so refactor ProcessInterrupts() and the calls to it in signal handlers so that the signal handlers always call it if ImmediateInterruptOK is set, and ProcessInterrupts() can decide to not do anything if the other conditions are not met. Reported by Emil Lenngren. Patch reviewed by Noah Misch and Andres Freund. Backpatch to all supported versions. Security: CVE-2015-0244
2015-02-02 16:08:45 +01:00
extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount;
extern PGDLLIMPORT volatile uint32 CritSectionCount;
/* in tcop/postgres.c */
extern void ProcessInterrupts(void);
#ifndef WIN32
#define CHECK_FOR_INTERRUPTS() \
do { \
if (InterruptPending) \
ProcessInterrupts(); \
} while(0)
2004-08-29 07:07:03 +02:00
#else /* WIN32 */
#define CHECK_FOR_INTERRUPTS() \
do { \
if (UNBLOCKED_SIGNAL_QUEUE()) \
pgwin32_dispatch_queued_signals(); \
if (InterruptPending) \
ProcessInterrupts(); \
} while(0)
2004-08-29 07:07:03 +02:00
#endif /* WIN32 */
2001-03-22 05:01:46 +01:00
#define HOLD_INTERRUPTS() (InterruptHoldoffCount++)
#define RESUME_INTERRUPTS() \
do { \
Assert(InterruptHoldoffCount > 0); \
InterruptHoldoffCount--; \
} while(0)
Be more careful to not lose sync in the FE/BE protocol. If any error occurred while we were in the middle of reading a protocol message from the client, we could lose sync, and incorrectly try to interpret a part of another message as a new protocol message. That will usually lead to an "invalid frontend message" error that terminates the connection. However, this is a security issue because an attacker might be able to deliberately cause an error, inject a Query message in what's supposed to be just user data, and have the server execute it. We were quite careful to not have CHECK_FOR_INTERRUPTS() calls or other operations that could ereport(ERROR) in the middle of processing a message, but a query cancel interrupt or statement timeout could nevertheless cause it to happen. Also, the V2 fastpath and COPY handling were not so careful. It's very difficult to recover in the V2 COPY protocol, so we will just terminate the connection on error. In practice, that's what happened previously anyway, as we lost protocol sync. To fix, add a new variable in pqcomm.c, PqCommReadingMsg, that is set whenever we're in the middle of reading a message. When it's set, we cannot safely ERROR out and continue running, because we might've read only part of a message. PqCommReadingMsg acts somewhat similarly to critical sections in that if an error occurs while it's set, the error handler will force the connection to be terminated, as if the error was FATAL. It's not implemented by promoting ERROR to FATAL in elog.c, like ERROR is promoted to PANIC in critical sections, because we want to be able to use PG_TRY/CATCH to recover and regain protocol sync. pq_getmessage() takes advantage of that to prevent an OOM error from terminating the connection. To prevent unnecessary connection terminations, add a holdoff mechanism similar to HOLD/RESUME_INTERRUPTS() that can be used hold off query cancel interrupts, but still allow die interrupts. The rules on which interrupts are processed when are now a bit more complicated, so refactor ProcessInterrupts() and the calls to it in signal handlers so that the signal handlers always call it if ImmediateInterruptOK is set, and ProcessInterrupts() can decide to not do anything if the other conditions are not met. Reported by Emil Lenngren. Patch reviewed by Noah Misch and Andres Freund. Backpatch to all supported versions. Security: CVE-2015-0244
2015-02-02 16:08:45 +01:00
#define HOLD_CANCEL_INTERRUPTS() (QueryCancelHoldoffCount++)
#define RESUME_CANCEL_INTERRUPTS() \
do { \
Assert(QueryCancelHoldoffCount > 0); \
QueryCancelHoldoffCount--; \
} while(0)
2001-03-22 05:01:46 +01:00
#define START_CRIT_SECTION() (CritSectionCount++)
#define END_CRIT_SECTION() \
do { \
Assert(CritSectionCount > 0); \
CritSectionCount--; \
} while(0)
1996-10-31 08:10:14 +01:00
/*****************************************************************************
* globals.h -- *
1996-10-31 08:10:14 +01:00
*****************************************************************************/
/*
* from utils/init/globals.c
*/
extern pid_t PostmasterPid;
extern bool IsPostmasterEnvironment;
extern PGDLLIMPORT bool IsUnderPostmaster;
Background worker processes Background workers are postmaster subprocesses that run arbitrary user-specified code. They can request shared memory access as well as backend database connections; or they can just use plain libpq frontend database connections. Modules listed in shared_preload_libraries can register background workers in their _PG_init() function; this is early enough that it's not necessary to provide an extra GUC option, because the necessary extra resources can be allocated early on. Modules can install more than one bgworker, if necessary. Care is taken that these extra processes do not interfere with other postmaster tasks: only one such process is started on each ServerLoop iteration. This means a large number of them could be waiting to be started up and postmaster is still able to quickly service external connection requests. Also, shutdown sequence should not be impacted by a worker process that's reasonably well behaved (i.e. promptly responds to termination signals.) The current implementation lets worker processes specify their start time, i.e. at what point in the server startup process they are to be started: right after postmaster start (in which case they mustn't ask for shared memory access), when consistent state has been reached (useful during recovery in a HOT standby server), or when recovery has terminated (i.e. when normal backends are allowed). In case of a bgworker crash, actions to take depend on registration data: if shared memory was requested, then all other connections are taken down (as well as other bgworkers), just like it were a regular backend crashing. The bgworker itself is restarted, too, within a configurable timeframe (which can be configured to be never). More features to add to this framework can be imagined without much effort, and have been discussed, but this seems good enough as a useful unit already. An elementary sample module is supplied. Author: Álvaro Herrera This patch is loosely based on prior patches submitted by KaiGai Kohei, and unsubmitted code by Simon Riggs. Reviewed by: KaiGai Kohei, Markus Wanner, Andres Freund, Heikki Linnakangas, Simon Riggs, Amit Kapila
2012-12-06 18:57:52 +01:00
extern bool IsBackgroundWorker;
extern PGDLLIMPORT bool IsBinaryUpgrade;
extern bool ExitOnAnyError;
extern PGDLLIMPORT char *DataDir;
1996-10-31 08:10:14 +01:00
extern PGDLLIMPORT int NBuffers;
extern int MaxBackends;
extern int MaxConnections;
extern int max_worker_processes;
extern PGDLLIMPORT int MyProcPid;
extern PGDLLIMPORT pg_time_t MyStartTime;
extern PGDLLIMPORT struct Port *MyProcPort;
extern PGDLLIMPORT struct Latch *MyLatch;
extern long MyCancelKey;
extern int MyPMChildSlot;
extern char OutputFileName[];
extern PGDLLIMPORT char my_exec_path[];
extern char pkglib_path[];
2004-08-29 07:07:03 +02:00
#ifdef EXEC_BACKEND
extern char postgres_exec_path[];
#endif
1996-10-31 08:10:14 +01:00
/*
* done in storage/backendid.h for now.
*
* extern BackendId MyBackendId;
*/
extern PGDLLIMPORT Oid MyDatabaseId;
1996-10-31 08:10:14 +01:00
extern PGDLLIMPORT Oid MyDatabaseTableSpace;
/*
* Date/Time Configuration
*
* DateStyle defines the output formatting choice for date/time types:
* USE_POSTGRES_DATES specifies traditional Postgres format
* USE_ISO_DATES specifies ISO-compliant format
* USE_SQL_DATES specifies Oracle/Ingres-compliant format
* USE_GERMAN_DATES specifies German-style dd.mm/yyyy
*
* DateOrder defines the field order to be assumed when reading an
* ambiguous date (anything not in YYYY-MM-DD format, with a four-digit
* year field first, is taken to be ambiguous):
* DATEORDER_YMD specifies field order yy-mm-dd
* DATEORDER_DMY specifies field order dd-mm-yy ("European" convention)
* DATEORDER_MDY specifies field order mm-dd-yy ("US" convention)
*
* In the Postgres and SQL DateStyles, DateOrder also selects output field
* order: day comes before month in DMY style, else month comes before day.
*
* The user-visible "DateStyle" run-time parameter subsumes both of these.
*/
/* valid DateStyle values */
#define USE_POSTGRES_DATES 0
#define USE_ISO_DATES 1
#define USE_SQL_DATES 2
#define USE_GERMAN_DATES 3
#define USE_XSD_DATES 4
/* valid DateOrder values */
#define DATEORDER_YMD 0
#define DATEORDER_DMY 1
#define DATEORDER_MDY 2
extern PGDLLIMPORT int DateStyle;
extern PGDLLIMPORT int DateOrder;
/*
* IntervalStyles
* INTSTYLE_POSTGRES Like Postgres < 8.4 when DateStyle = 'iso'
* INTSTYLE_POSTGRES_VERBOSE Like Postgres < 8.4 when DateStyle != 'iso'
* INTSTYLE_SQL_STANDARD SQL standard interval literals
* INTSTYLE_ISO_8601 ISO-8601-basic formatted intervals
*/
#define INTSTYLE_POSTGRES 0
#define INTSTYLE_POSTGRES_VERBOSE 1
#define INTSTYLE_SQL_STANDARD 2
#define INTSTYLE_ISO_8601 3
extern PGDLLIMPORT int IntervalStyle;
#define MAXTZLEN 10 /* max TZ name len, not counting tr. null */
extern bool enableFsync;
1999-05-25 18:15:34 +02:00
extern bool allowSystemTableMods;
extern PGDLLIMPORT int work_mem;
extern PGDLLIMPORT int maintenance_work_mem;
2004-02-06 20:36:18 +01:00
extern int VacuumCostPageHit;
extern int VacuumCostPageMiss;
extern int VacuumCostPageDirty;
extern int VacuumCostLimit;
extern int VacuumCostDelay;
extern int VacuumPageHit;
extern int VacuumPageMiss;
extern int VacuumPageDirty;
extern int VacuumCostBalance;
2004-08-29 07:07:03 +02:00
extern bool VacuumCostActive;
/* in tcop/postgres.c */
#if defined(__ia64__) || defined(__ia64)
typedef struct
{
char *stack_base_ptr;
char *register_stack_base_ptr;
} pg_stack_base_t;
#else
typedef char *pg_stack_base_t;
#endif
extern pg_stack_base_t set_stack_base(void);
extern void restore_stack_base(pg_stack_base_t base);
extern void check_stack_depth(void);
extern bool stack_is_too_deep(void);
Allow read only connections during recovery, known as Hot Standby. Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record. New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far. This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required. Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit. Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
/* in tcop/utility.c */
extern void PreventCommandIfReadOnly(const char *cmdname);
extern void PreventCommandIfParallelMode(const char *cmdname);
extern void PreventCommandDuringRecovery(const char *cmdname);
Allow read only connections during recovery, known as Hot Standby. Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record. New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far. This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required. Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit. Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-19 02:32:45 +01:00
/* in utils/misc/guc.c */
2010-02-26 03:01:40 +01:00
extern int trace_recovery_messages;
extern int trace_recovery(int trace_level);
1996-10-31 08:10:14 +01:00
/*****************************************************************************
* pdir.h -- *
* POSTGRES directory path definitions. *
1996-10-31 08:10:14 +01:00
*****************************************************************************/
2009-12-09 22:57:51 +01:00
/* flags to be OR'd to form sec_context */
#define SECURITY_LOCAL_USERID_CHANGE 0x0001
#define SECURITY_RESTRICTED_OPERATION 0x0002
#define SECURITY_NOFORCE_RLS 0x0004
2009-12-09 22:57:51 +01:00
extern char *DatabasePath;
1996-10-31 08:10:14 +01:00
/* now in utils/init/miscinit.c */
extern void InitPostmasterChild(void);
extern void InitStandaloneProcess(const char *argv0);
extern void SetDatabasePath(const char *path);
extern char *GetUserNameFromId(Oid roleid, bool noerr);
2005-10-15 04:49:52 +02:00
extern Oid GetUserId(void);
extern Oid GetOuterUserId(void);
extern Oid GetSessionUserId(void);
extern Oid GetAuthenticatedUserId(void);
2009-12-09 22:57:51 +01:00
extern void GetUserIdAndSecContext(Oid *userid, int *sec_context);
extern void SetUserIdAndSecContext(Oid userid, int sec_context);
extern bool InLocalUserIdChange(void);
extern bool InSecurityRestrictedOperation(void);
extern bool InNoForceRLSOperation(void);
extern void GetUserIdAndContext(Oid *userid, bool *sec_def_context);
extern void SetUserIdAndContext(Oid userid, bool sec_def_context);
extern void InitializeSessionUserId(const char *rolename, Oid useroid);
extern void InitializeSessionUserIdStandalone(void);
extern void SetSessionAuthorization(Oid userid, bool is_superuser);
2005-10-15 04:49:52 +02:00
extern Oid GetCurrentRoleId(void);
extern void SetCurrentRoleId(Oid roleid, bool is_superuser);
extern void SetDataDir(const char *dir);
extern void ChangeToDataDir(void);
extern void SwitchToSharedLatch(void);
extern void SwitchBackToLocalLatch(void);
/* in utils/misc/superuser.c */
extern bool superuser(void); /* current user is superuser */
2005-10-15 04:49:52 +02:00
extern bool superuser_arg(Oid roleid); /* given user is superuser */
1996-10-31 08:10:14 +01:00
/*****************************************************************************
* pmod.h -- *
* POSTGRES processing mode definitions. *
1996-10-31 08:10:14 +01:00
*****************************************************************************/
1996-10-31 08:10:14 +01:00
/*
* Description:
* There are three processing modes in POSTGRES. They are
* BootstrapProcessing or "bootstrap," InitProcessing or
1996-10-31 08:10:14 +01:00
* "initialization," and NormalProcessing or "normal."
*
* The first two processing modes are used during special times. When the
1996-10-31 08:10:14 +01:00
* system state indicates bootstrap processing, transactions are all given
* transaction id "one" and are consequently guaranteed to commit. This mode
1996-10-31 08:10:14 +01:00
* is used during the initial generation of template databases.
*
* Initialization mode: used while starting a backend, until all normal
* initialization is complete. Some code behaves differently when executed
* in this mode to enable system bootstrapping.
*
Fix management of pendingOpsTable in auxiliary processes. mdinit() was misusing IsBootstrapProcessingMode() to decide whether to create an fsync pending-operations table in the current process. This led to creating a table not only in the startup and checkpointer processes as intended, but also in the bgwriter process, not to mention other auxiliary processes such as walwriter and walreceiver. Creation of the table in the bgwriter is fatal, because it absorbs fsync requests that should have gone to the checkpointer; instead they just sit in bgwriter local memory and are never acted on. So writes performed by the bgwriter were not being fsync'd which could result in data loss after an OS crash. I think there is no live bug with respect to walwriter and walreceiver because those never perform any writes of shared buffers; but the potential is there for future breakage in those processes too. To fix, make AuxiliaryProcessMain() export the current process's AuxProcType as a global variable, and then make mdinit() test directly for the types of aux process that should have a pendingOpsTable. Having done that, we might as well also get rid of the random bool flags such as am_walreceiver that some of the aux processes had grown. (Note that we could not have fixed the bug by examining those variables in mdinit(), because it's called from BaseInit() which is run by AuxiliaryProcessMain() before entering any of the process-type-specific code.) Back-patch to 9.2, where the problem was introduced by the split-up of bgwriter and checkpointer processes. The bogus pendingOpsTable exists in walwriter and walreceiver processes in earlier branches, but absent any evidence that it causes actual problems there, I'll leave the older branches alone.
2012-07-18 21:28:10 +02:00
* If a POSTGRES backend process is in normal mode, then all code may be
* executed normally.
1996-10-31 08:10:14 +01:00
*/
typedef enum ProcessingMode
{
BootstrapProcessing, /* bootstrap creation of template database */
InitProcessing, /* initializing system */
NormalProcessing /* normal processing */
} ProcessingMode;
1996-10-31 08:10:14 +01:00
extern ProcessingMode Mode;
#define IsBootstrapProcessingMode() (Mode == BootstrapProcessing)
Fix management of pendingOpsTable in auxiliary processes. mdinit() was misusing IsBootstrapProcessingMode() to decide whether to create an fsync pending-operations table in the current process. This led to creating a table not only in the startup and checkpointer processes as intended, but also in the bgwriter process, not to mention other auxiliary processes such as walwriter and walreceiver. Creation of the table in the bgwriter is fatal, because it absorbs fsync requests that should have gone to the checkpointer; instead they just sit in bgwriter local memory and are never acted on. So writes performed by the bgwriter were not being fsync'd which could result in data loss after an OS crash. I think there is no live bug with respect to walwriter and walreceiver because those never perform any writes of shared buffers; but the potential is there for future breakage in those processes too. To fix, make AuxiliaryProcessMain() export the current process's AuxProcType as a global variable, and then make mdinit() test directly for the types of aux process that should have a pendingOpsTable. Having done that, we might as well also get rid of the random bool flags such as am_walreceiver that some of the aux processes had grown. (Note that we could not have fixed the bug by examining those variables in mdinit(), because it's called from BaseInit() which is run by AuxiliaryProcessMain() before entering any of the process-type-specific code.) Back-patch to 9.2, where the problem was introduced by the split-up of bgwriter and checkpointer processes. The bogus pendingOpsTable exists in walwriter and walreceiver processes in earlier branches, but absent any evidence that it causes actual problems there, I'll leave the older branches alone.
2012-07-18 21:28:10 +02:00
#define IsInitProcessingMode() (Mode == InitProcessing)
#define IsNormalProcessingMode() (Mode == NormalProcessing)
#define GetProcessingMode() Mode
#define SetProcessingMode(mode) \
do { \
AssertArg((mode) == BootstrapProcessing || \
(mode) == InitProcessing || \
(mode) == NormalProcessing); \
Mode = (mode); \
} while(0)
1996-10-31 08:10:14 +01:00
Fix management of pendingOpsTable in auxiliary processes. mdinit() was misusing IsBootstrapProcessingMode() to decide whether to create an fsync pending-operations table in the current process. This led to creating a table not only in the startup and checkpointer processes as intended, but also in the bgwriter process, not to mention other auxiliary processes such as walwriter and walreceiver. Creation of the table in the bgwriter is fatal, because it absorbs fsync requests that should have gone to the checkpointer; instead they just sit in bgwriter local memory and are never acted on. So writes performed by the bgwriter were not being fsync'd which could result in data loss after an OS crash. I think there is no live bug with respect to walwriter and walreceiver because those never perform any writes of shared buffers; but the potential is there for future breakage in those processes too. To fix, make AuxiliaryProcessMain() export the current process's AuxProcType as a global variable, and then make mdinit() test directly for the types of aux process that should have a pendingOpsTable. Having done that, we might as well also get rid of the random bool flags such as am_walreceiver that some of the aux processes had grown. (Note that we could not have fixed the bug by examining those variables in mdinit(), because it's called from BaseInit() which is run by AuxiliaryProcessMain() before entering any of the process-type-specific code.) Back-patch to 9.2, where the problem was introduced by the split-up of bgwriter and checkpointer processes. The bogus pendingOpsTable exists in walwriter and walreceiver processes in earlier branches, but absent any evidence that it causes actual problems there, I'll leave the older branches alone.
2012-07-18 21:28:10 +02:00
/*
* Auxiliary-process type identifiers. These used to be in bootstrap.h
Fix management of pendingOpsTable in auxiliary processes. mdinit() was misusing IsBootstrapProcessingMode() to decide whether to create an fsync pending-operations table in the current process. This led to creating a table not only in the startup and checkpointer processes as intended, but also in the bgwriter process, not to mention other auxiliary processes such as walwriter and walreceiver. Creation of the table in the bgwriter is fatal, because it absorbs fsync requests that should have gone to the checkpointer; instead they just sit in bgwriter local memory and are never acted on. So writes performed by the bgwriter were not being fsync'd which could result in data loss after an OS crash. I think there is no live bug with respect to walwriter and walreceiver because those never perform any writes of shared buffers; but the potential is there for future breakage in those processes too. To fix, make AuxiliaryProcessMain() export the current process's AuxProcType as a global variable, and then make mdinit() test directly for the types of aux process that should have a pendingOpsTable. Having done that, we might as well also get rid of the random bool flags such as am_walreceiver that some of the aux processes had grown. (Note that we could not have fixed the bug by examining those variables in mdinit(), because it's called from BaseInit() which is run by AuxiliaryProcessMain() before entering any of the process-type-specific code.) Back-patch to 9.2, where the problem was introduced by the split-up of bgwriter and checkpointer processes. The bogus pendingOpsTable exists in walwriter and walreceiver processes in earlier branches, but absent any evidence that it causes actual problems there, I'll leave the older branches alone.
2012-07-18 21:28:10 +02:00
* but it seems saner to have them here, with the ProcessingMode stuff.
* The MyAuxProcType global is defined and set in bootstrap.c.
*/
typedef enum
{
NotAnAuxProcess = -1,
CheckerProcess = 0,
BootstrapProcess,
StartupProcess,
BgWriterProcess,
CheckpointerProcess,
WalWriterProcess,
WalReceiverProcess,
NUM_AUXPROCTYPES /* Must be last! */
} AuxProcType;
extern AuxProcType MyAuxProcType;
#define AmBootstrapProcess() (MyAuxProcType == BootstrapProcess)
#define AmStartupProcess() (MyAuxProcType == StartupProcess)
#define AmBackgroundWriterProcess() (MyAuxProcType == BgWriterProcess)
Fix management of pendingOpsTable in auxiliary processes. mdinit() was misusing IsBootstrapProcessingMode() to decide whether to create an fsync pending-operations table in the current process. This led to creating a table not only in the startup and checkpointer processes as intended, but also in the bgwriter process, not to mention other auxiliary processes such as walwriter and walreceiver. Creation of the table in the bgwriter is fatal, because it absorbs fsync requests that should have gone to the checkpointer; instead they just sit in bgwriter local memory and are never acted on. So writes performed by the bgwriter were not being fsync'd which could result in data loss after an OS crash. I think there is no live bug with respect to walwriter and walreceiver because those never perform any writes of shared buffers; but the potential is there for future breakage in those processes too. To fix, make AuxiliaryProcessMain() export the current process's AuxProcType as a global variable, and then make mdinit() test directly for the types of aux process that should have a pendingOpsTable. Having done that, we might as well also get rid of the random bool flags such as am_walreceiver that some of the aux processes had grown. (Note that we could not have fixed the bug by examining those variables in mdinit(), because it's called from BaseInit() which is run by AuxiliaryProcessMain() before entering any of the process-type-specific code.) Back-patch to 9.2, where the problem was introduced by the split-up of bgwriter and checkpointer processes. The bogus pendingOpsTable exists in walwriter and walreceiver processes in earlier branches, but absent any evidence that it causes actual problems there, I'll leave the older branches alone.
2012-07-18 21:28:10 +02:00
#define AmCheckpointerProcess() (MyAuxProcType == CheckpointerProcess)
#define AmWalWriterProcess() (MyAuxProcType == WalWriterProcess)
#define AmWalReceiverProcess() (MyAuxProcType == WalReceiverProcess)
/*****************************************************************************
* pinit.h -- *
* POSTGRES initialization and cleanup definitions. *
*****************************************************************************/
/* in utils/init/postinit.c */
extern void pg_split_opts(char **argv, int *argcp, const char *optstr);
extern void InitializeMaxBackends(void);
extern void InitPostgres(const char *in_dbname, Oid dboid, const char *username,
Oid useroid, char *out_dbname);
extern void BaseInit(void);
/* in utils/init/miscinit.c */
extern bool IgnoreSystemIndexes;
extern PGDLLIMPORT bool process_shared_preload_libraries_in_progress;
extern char *session_preload_libraries_string;
extern char *shared_preload_libraries_string;
extern char *local_preload_libraries_string;
/*
* As of 9.1, the contents of the data-directory lock file are:
*
* line #
* 1 postmaster PID (or negative of a standalone backend's PID)
* 2 data directory path
* 3 postmaster start timestamp (time_t representation)
* 4 port number
* 5 first Unix socket directory path (empty if none)
* 6 first listen_address (IP address or "*"; empty if no TCP port)
* 7 shared memory key (not present on Windows)
*
* Lines 6 and up are added via AddToDataDirLockFile() after initial file
* creation.
*
* The socket lock file, if used, has the same contents as lines 1-5.
*/
#define LOCK_FILE_LINE_PID 1
#define LOCK_FILE_LINE_DATA_DIR 2
#define LOCK_FILE_LINE_START_TIME 3
#define LOCK_FILE_LINE_PORT 4
#define LOCK_FILE_LINE_SOCKET_DIR 5
#define LOCK_FILE_LINE_LISTEN_ADDR 6
#define LOCK_FILE_LINE_SHMEM_KEY 7
extern void CreateDataDirLockFile(bool amPostmaster);
extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster,
const char *socketDir);
extern void TouchSocketLockFiles(void);
extern void AddToDataDirLockFile(int target_line, const char *str);
extern void ValidatePgVersion(const char *path);
extern void process_shared_preload_libraries(void);
extern void process_session_preload_libraries(void);
extern void pg_bindtextdomain(const char *domain);
extern bool has_rolreplication(Oid roleid);
/* in access/transam/xlog.c */
extern bool BackupInProgress(void);
extern void CancelBackup(void);
#endif /* MISCADMIN_H */