postgresql/src/backend/postmaster/postmaster.c

2688 lines
65 KiB
C

/*-------------------------------------------------------------------------
*
* postmaster.c
* This program acts as a clearing house for requests to the
* POSTGRES system. Frontend programs send a startup message
* to the Postmaster and the postmaster uses the info in the
* message to setup a backend process.
*
* The postmaster also manages system-wide operations such as
* startup, shutdown, and periodic checkpoints. The postmaster
* itself doesn't do those operations, mind you --- it just forks
* off a subprocess to do them at the right times. It also takes
* care of resetting the system if a backend crashes.
*
* The postmaster process creates the shared memory and semaphore
* pools during startup, but as a rule does not touch them itself.
* In particular, it is not a member of the PGPROC array of backends
* and so it cannot participate in lock-manager operations. Keeping
* the postmaster away from shared memory operations makes it simpler
* and more reliable. The postmaster is almost always able to recover
* from crashes of individual backends by resetting shared memory;
* if it did much with shared memory then it would be prone to crashing
* along with the backends.
*
* When a request message is received, we now fork() immediately.
* The child process performs authentication of the request, and
* then becomes a backend if successful. This allows the auth code
* to be written in a simple single-threaded style (as opposed to the
* crufty "poor man's multitasking" code that used to be needed).
* More importantly, it ensures that blockages in non-multithreaded
* libraries like SSL or PAM cannot cause denial of service to other
* clients.
*
*
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/postmaster/postmaster.c,v 1.293 2002/11/13 00:39:47 momjian Exp $
*
* NOTES
*
* Initialization:
* The Postmaster sets up a few shared memory data structures
* for the backends. It should at the very least initialize the
* lock manager.
*
* Synchronization:
* The Postmaster shares memory with the backends but should avoid
* touching shared memory, so as not to become stuck if a crashing
* backend screws up locks or shared memory. Likewise, the Postmaster
* should never block on messages from frontend clients.
*
* Garbage Collection:
* The Postmaster cleans up after backends if they have an emergency
* exit and/or core dump.
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <ctype.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <sys/param.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <limits.h>
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#include "catalog/pg_database.h"
#include "commands/async.h"
#include "lib/dllist.h"
#include "libpq/auth.h"
#include "libpq/crypt.h"
#include "libpq/libpq.h"
#include "libpq/pqcomm.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "nodes/nodes.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "access/xlog.h"
#include "tcop/tcopprot.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "bootstrap/bootstrap.h"
#include "pgstat.h"
#define INVALID_SOCK (-1)
#define ARGV_SIZE 64
#ifdef HAVE_SIGPROCMASK
sigset_t UnBlockSig,
BlockSig,
AuthBlockSig;
#else
int UnBlockSig,
BlockSig,
AuthBlockSig;
#endif
/*
* List of active backends (or child processes anyway; we don't actually
* know whether a given child has become a backend or is still in the
* authorization phase). This is used mainly to keep track of how many
* children we have and send them appropriate signals when necessary.
*/
typedef struct bkend
{
pid_t pid; /* process id of backend */
long cancel_key; /* cancel key for cancels for this backend */
} Backend;
static Dllist *BackendList;
/* The socket number we are listening for connections on */
int PostPortNumber;
char *UnixSocketDir;
char *VirtualHost;
/*
* MaxBackends is the limit on the number of backends we can start.
* The default is established by configure, but it can be altered at
* postmaster start with the postmaster's -N switch. Note
* that a larger MaxBackends value will increase the size of the shared
* memory area as well as cause the postmaster to grab more kernel
* semaphores, even if you never actually use that many backends.
*/
int MaxBackends = DEF_MAXBACKENDS;
/*
* ReservedBackends is the number of backends reserved for superuser use.
* This number is taken out of the pool size given by MaxBackends so
* number of backend slots available to none super users is
* (MaxBackends - ReservedBackends). Note, existing super user
* connections are not taken into account once this lower limit has
* been reached, i.e. superuser connections made before the lower limit
* is reached always count towards that limit and are not taken from
* ReservedBackends.
*/
int ReservedBackends = 2;
static char *progname = (char *) NULL;
/*
* Default Values
*/
static int ServerSock_INET = INVALID_SOCK; /* stream socket server */
#ifdef HAVE_UNIX_SOCKETS
static int ServerSock_UNIX = INVALID_SOCK; /* stream socket server */
#endif
/*
* Set by the -o option
*/
static char ExtraOptions[MAXPGPATH];
/*
* These globals control the behavior of the postmaster in case some
* backend dumps core. Normally, it kills all peers of the dead backend
* and reinitializes shared memory. By specifying -s or -n, we can have
* the postmaster stop (rather than kill) peers and not reinitialize
* shared data structures.
*/
static bool Reinit = true;
static int SendStop = false;
/* still more option variables */
bool NetServer = false; /* listen on TCP/IP */
bool EnableSSL = false;
bool SilentMode = false; /* silent mode (-S) */
int PreAuthDelay = 0;
int AuthenticationTimeout = 60;
int CheckPointTimeout = 300;
bool HostnameLookup; /* for ps display */
bool ShowPortNumber;
bool Log_connections = false;
bool Db_user_namespace = false;
/* Startup/shutdown state */
static pid_t StartupPID = 0,
ShutdownPID = 0,
CheckPointPID = 0;
static time_t checkpointed = 0;
#define NoShutdown 0
#define SmartShutdown 1
#define FastShutdown 2
static int Shutdown = NoShutdown;
static bool FatalError = false; /* T if recovering from backend crash */
bool ClientAuthInProgress = false; /* T during new-client
* authentication */
/*
* State for assigning random salts and cancel keys.
* Also, the global MyCancelKey passes the cancel key assigned to a given
* backend from the postmaster to that backend (via fork).
*/
static unsigned int random_seed = 0;
static int debug_flag = 0;
extern char *optarg;
extern int optind,
opterr;
#ifdef HAVE_INT_OPTRESET
extern int optreset;
#endif
/*
* postmaster.c - function prototypes
*/
static void pmdaemonize(int argc, char *argv[]);
static Port *ConnCreate(int serverFd);
static void ConnFree(Port *port);
static void reset_shared(unsigned short port);
static void SIGHUP_handler(SIGNAL_ARGS);
static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS);
static void sigusr1_handler(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS);
static void CleanupProc(int pid, int exitstatus);
static void LogChildExit(int lev, const char *procname,
int pid, int exitstatus);
static int DoBackend(Port *port);
void ExitPostmaster(int status);
static void usage(const char *);
static int ServerLoop(void);
static int BackendStartup(Port *port);
static int ProcessStartupPacket(Port *port, bool SSLdone);
static void processCancelRequest(Port *port, void *pkt);
static int initMasks(fd_set *rmask, fd_set *wmask);
static void report_fork_failure_to_client(Port *port, int errnum);
enum CAC_state
{
CAC_OK, CAC_STARTUP, CAC_SHUTDOWN, CAC_RECOVERY, CAC_TOOMANY
};
static enum CAC_state canAcceptConnections(void);
static long PostmasterRandom(void);
static void RandomSalt(char *cryptSalt, char *md5Salt);
static void SignalChildren(int signal);
static int CountChildren(void);
static bool CreateOptsFile(int argc, char *argv[]);
static pid_t SSDataBase(int xlop);
void
postmaster_error(const char *fmt,...)
/* This lets gcc check the format string for consistency. */
__attribute__((format(printf, 1, 2)));
#define StartupDataBase() SSDataBase(BS_XLOG_STARTUP)
#define CheckPointDataBase() SSDataBase(BS_XLOG_CHECKPOINT)
#define ShutdownDataBase() SSDataBase(BS_XLOG_SHUTDOWN)
#ifdef USE_SSL
extern int secure_initialize(void);
extern void secure_destroy(void);
extern int secure_open_server(Port *);
extern void secure_close(Port *);
#endif /* USE_SSL */
static void
checkDataDir(const char *checkdir)
{
char path[MAXPGPATH];
FILE *fp;
#ifndef __CYGWIN__
struct stat stat_buf;
#endif
if (checkdir == NULL)
{
fprintf(stderr, gettext(
"%s does not know where to find the database system data.\n"
"You must specify the directory that contains the database system\n"
"either by specifying the -D invocation option or by setting the\n"
"PGDATA environment variable.\n\n"),
progname);
ExitPostmaster(2);
}
/*
* Check if the directory has group or world access. If so, reject.
*
* XXX temporarily suppress check when on Windows, because there may not
* be proper support for Unix-y file permissions. Need to think of a
* reasonable check to apply on Windows.
*/
#ifndef __CYGWIN__
if (stat(checkdir, &stat_buf) == -1)
{
if (errno == ENOENT)
elog(FATAL, "data directory %s was not found", checkdir);
else
elog(FATAL, "could not read permissions of directory %s: %m",
checkdir);
}
if (stat_buf.st_mode & (S_IRWXG | S_IRWXO))
elog(FATAL, "data directory %s has group or world access; permissions should be u=rwx (0700)",
checkdir);
#endif /* !__CYGWIN__ */
/* Look for PG_VERSION before looking for pg_control */
ValidatePgVersion(checkdir);
snprintf(path, sizeof(path), "%s/global/pg_control", checkdir);
fp = AllocateFile(path, PG_BINARY_R);
if (fp == NULL)
{
fprintf(stderr, gettext(
"%s does not find the database system.\n"
"Expected to find it in the PGDATA directory \"%s\",\n"
"but unable to open file \"%s\": %s\n\n"),
progname, checkdir, path, strerror(errno));
ExitPostmaster(2);
}
FreeFile(fp);
}
int
PostmasterMain(int argc, char *argv[])
{
int opt;
int status;
char original_extraoptions[MAXPGPATH];
char *potential_DataDir = NULL;
*original_extraoptions = '\0';
progname = argv[0];
/*
* Catch standard options before doing much else. This even works on
* systems without getopt_long.
*/
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
usage(progname);
ExitPostmaster(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("postmaster (PostgreSQL) " PG_VERSION);
ExitPostmaster(0);
}
}
/*
* for security, no dir or file created can be group or other
* accessible
*/
umask((mode_t) 0077);
MyProcPid = getpid();
/*
* Fire up essential subsystems: memory management
*/
MemoryContextInit();
/*
* By default, palloc() requests in the postmaster will be allocated
* in the PostmasterContext, which is space that can be recycled by
* backends. Allocated data that needs to be available to backends
* should be allocated in TopMemoryContext.
*/
PostmasterContext = AllocSetContextCreate(TopMemoryContext,
"Postmaster",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(PostmasterContext);
IgnoreSystemIndexes(false);
/*
* Options setup
*/
InitializeGUCOptions();
potential_DataDir = getenv("PGDATA"); /* default value */
opterr = 1;
while ((opt = getopt(argc, argv, "A:a:B:b:c:D:d:Fh:ik:lm:MN:no:p:Ss-:")) != -1)
{
switch (opt)
{
case 'A':
#ifdef USE_ASSERT_CHECKING
SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
#else
postmaster_error("Assert checking is not compiled in.");
#endif
break;
case 'a':
/* Can no longer set authentication method. */
break;
case 'B':
SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
case 'b':
/* Can no longer set the backend executable file to use. */
break;
case 'D':
potential_DataDir = optarg;
break;
case 'd':
{
/* Turn on debugging for the postmaster. */
char *debugstr = palloc(strlen("debug") + strlen(optarg) + 1);
sprintf(debugstr, "debug%s", optarg);
SetConfigOption("server_min_messages", debugstr,
PGC_POSTMASTER, PGC_S_ARGV);
pfree(debugstr);
debug_flag = atoi(optarg);
break;
}
case 'F':
SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
break;
case 'h':
SetConfigOption("virtual_host", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
case 'i':
SetConfigOption("tcpip_socket", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
case 'k':
SetConfigOption("unix_socket_directory", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
#ifdef USE_SSL
case 'l':
SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
#endif
case 'm':
/* Multiplexed backends no longer supported. */
break;
case 'M':
/*
* ignore this flag. This may be passed in because the
* program was run as 'postgres -M' instead of
* 'postmaster'
*/
break;
case 'N':
/* The max number of backends to start. */
SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
case 'n':
/* Don't reinit shared mem after abnormal exit */
Reinit = false;
break;
case 'o':
/*
* Other options to pass to the backend on the command
* line -- useful only for debugging.
*/
strcat(ExtraOptions, " ");
strcat(ExtraOptions, optarg);
strcpy(original_extraoptions, optarg);
break;
case 'p':
SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
case 'S':
/*
* Start in 'S'ilent mode (disassociate from controlling
* tty). You may also think of this as 'S'ysV mode since
* it's most badly needed on SysV-derived systems like
* SVR4 and HP-UX.
*/
SetConfigOption("silent_mode", "true", PGC_POSTMASTER, PGC_S_ARGV);
break;
case 's':
/*
* In the event that some backend dumps core, send
* SIGSTOP, rather than SIGQUIT, to all its peers. This
* lets the wily post_hacker collect core dumps from
* everyone.
*/
SendStop = true;
break;
case 'c':
case '-':
{
char *name,
*value;
ParseLongOption(optarg, &name, &value);
if (!value)
{
if (opt == '-')
elog(ERROR, "--%s requires argument", optarg);
else
elog(ERROR, "-c %s requires argument", optarg);
}
SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
free(name);
if (value)
free(value);
break;
}
default:
fprintf(stderr, gettext("Try '%s --help' for more information.\n"), progname);
ExitPostmaster(1);
}
}
/*
* Postmaster accepts no non-option switch arguments.
*/
if (optind < argc)
{
postmaster_error("invalid argument -- %s", argv[optind]);
fprintf(stderr, gettext("Try '%s --help' for more information.\n"),
progname);
ExitPostmaster(1);
}
/*
* Check for invalid combinations of switches
*/
if (NBuffers < 2 * MaxBackends || NBuffers < 16)
{
/*
* Do not accept -B so small that backends are likely to starve
* for lack of buffers. The specific choices here are somewhat
* arbitrary.
*/
postmaster_error("The number of buffers (-B) must be at least twice the number of allowed connections (-N) and at least 16.");
ExitPostmaster(1);
}
checkDataDir(potential_DataDir); /* issues error messages */
SetDataDir(potential_DataDir);
ProcessConfigFile(PGC_POSTMASTER);
/*
* Force an exit if ReservedBackends is not less than MaxBackends.
*/
if (ReservedBackends >= MaxBackends)
elog(FATAL, "superuser_reserved_connections must be less than max_connections.");
/*
* Now that we are done processing the postmaster arguments, reset
* getopt(3) library so that it will work correctly in subprocesses.
*/
optind = 1;
#ifdef HAVE_INT_OPTRESET
optreset = 1; /* some systems need this too */
#endif
/* For debugging: display postmaster environment */
{
extern char **environ;
char **p;
elog(DEBUG2, "%s: PostmasterMain: initial environ dump:", progname);
elog(DEBUG2, "-----------------------------------------");
for (p = environ; *p; ++p)
elog(DEBUG2, "\t%s", *p);
elog(DEBUG2, "-----------------------------------------");
}
/*
* On some systems our dynloader code needs the executable's pathname.
*/
if (FindExec(pg_pathname, progname, "postgres") < 0)
elog(FATAL, "%s: could not locate executable, bailing out...",
progname);
/*
* Initialize SSL library, if specified.
*/
#ifdef USE_SSL
if (EnableSSL && !NetServer)
{
postmaster_error("For SSL, TCP/IP connections must be enabled.");
fprintf(stderr, gettext("Try '%s --help' for more information.\n"), progname);
ExitPostmaster(1);
}
if (EnableSSL)
secure_initialize();
#endif
/*
* Fork away from controlling terminal, if -S specified.
*
* Must do this before we grab any interlock files, else the interlocks
* will show the wrong PID.
*/
if (SilentMode)
pmdaemonize(argc, argv);
/*
* Create lockfile for data directory.
*
* We want to do this before we try to grab the input sockets, because
* the data directory interlock is more reliable than the socket-file
* interlock (thanks to whoever decided to put socket files in /tmp
* :-(). For the same reason, it's best to grab the TCP socket before
* the Unix socket.
*/
if (!CreateDataDirLockFile(DataDir, true))
ExitPostmaster(1);
/*
* Remove old temporary files. At this point there can be no other
* Postgres processes running in this directory, so this should be
* safe.
*/
RemovePgTempFiles();
/*
* Establish input sockets.
*/
if (NetServer)
{
status = StreamServerPort(AF_INET, VirtualHost,
(unsigned short) PostPortNumber,
UnixSocketDir,
&ServerSock_INET);
if (status != STATUS_OK)
{
postmaster_error("cannot create INET stream port");
ExitPostmaster(1);
}
}
#ifdef HAVE_UNIX_SOCKETS
status = StreamServerPort(AF_UNIX, VirtualHost,
(unsigned short) PostPortNumber,
UnixSocketDir,
&ServerSock_UNIX);
if (status != STATUS_OK)
{
postmaster_error("cannot create UNIX stream port");
ExitPostmaster(1);
}
#endif
XLOGPathInit();
/*
* Set up shared memory and semaphores.
*/
reset_shared(PostPortNumber);
/*
* Initialize the list of active backends.
*/
BackendList = DLNewList();
/*
* Record postmaster options. We delay this till now to avoid
* recording bogus options (eg, NBuffers too high for available
* memory).
*/
if (!CreateOptsFile(argc, argv))
ExitPostmaster(1);
/*
* Set up signal handlers for the postmaster process.
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
* bootstrap/bootstrap.c, and postmaster/pgstat.c.
*/
pqinitmask();
PG_SETMASK(&BlockSig);
pqsignal(SIGHUP, SIGHUP_handler); /* reread config file and have
* children do same */
pqsignal(SIGINT, pmdie); /* send SIGTERM and ShutdownDataBase */
pqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */
pqsignal(SIGTERM, pmdie); /* wait for children and ShutdownDataBase */
pqsignal(SIGALRM, SIG_IGN); /* ignored */
pqsignal(SIGPIPE, SIG_IGN); /* ignored */
pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
pqsignal(SIGUSR2, dummy_handler); /* unused, reserve for children */
pqsignal(SIGCHLD, reaper); /* handle child termination */
pqsignal(SIGTTIN, SIG_IGN); /* ignored */
pqsignal(SIGTTOU, SIG_IGN); /* ignored */
/*
* Reset whereToSendOutput from Debug (its starting state) to None.
* This prevents elog from sending log messages to stderr unless the
* syslog/stderr switch permits. We don't do this until the
* postmaster is fully launched, since startup failures may as well be
* reported to stderr.
*/
whereToSendOutput = None;
/*
* On many platforms, the first call of localtime() incurs significant
* overhead to load timezone info from the system configuration files.
* By doing it once in the postmaster, we avoid having to do it in
* every started child process. The savings are not huge, but they
* add up...
*/
{
time_t now = time(NULL);
(void) localtime(&now);
}
/*
* Initialize and startup the statistics collector process
*/
if (pgstat_init() < 0)
ExitPostmaster(1);
if (pgstat_start() < 0)
ExitPostmaster(1);
/*
* Load cached files for client authentication.
*/
load_hba();
load_ident();
load_user();
load_group();
/*
* We're ready to rock and roll...
*/
StartupPID = StartupDataBase();
status = ServerLoop();
/*
* ServerLoop probably shouldn't ever return, but if it does, close
* down.
*/
ExitPostmaster(status != STATUS_OK);
return 0; /* not reached */
}
static void
pmdaemonize(int argc, char *argv[])
{
int i;
pid_t pid;
#ifdef LINUX_PROFILE
struct itimerval prof_itimer;
#endif
#ifdef LINUX_PROFILE
/* see comments in BackendStartup */
getitimer(ITIMER_PROF, &prof_itimer);
#endif
pid = fork();
if (pid == (pid_t) -1)
{
postmaster_error("fork failed: %s", strerror(errno));
ExitPostmaster(1);
return; /* not reached */
}
else if (pid)
{ /* parent */
/* Parent should just exit, without doing any atexit cleanup */
_exit(0);
}
#ifdef LINUX_PROFILE
setitimer(ITIMER_PROF, &prof_itimer, NULL);
#endif
MyProcPid = getpid(); /* reset MyProcPid to child */
/* GH: If there's no setsid(), we hopefully don't need silent mode.
* Until there's a better solution.
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
{
postmaster_error("cannot disassociate from controlling TTY: %s",
strerror(errno));
ExitPostmaster(1);
}
#endif
i = open(NULL_DEV, O_RDWR | PG_BINARY);
dup2(i, 0);
dup2(i, 1);
dup2(i, 2);
close(i);
}
/*
* Print out help message
*/
static void
usage(const char *progname)
{
printf(gettext("%s is the PostgreSQL server.\n\n"), progname);
printf(gettext("Usage:\n %s [OPTION]...\n\n"), progname);
printf(gettext("Options:\n"));
#ifdef USE_ASSERT_CHECKING
printf(gettext(" -A 1|0 enable/disable run-time assert checking\n"));
#endif
printf(gettext(" -B NBUFFERS number of shared buffers (default %d)\n"), DEF_NBUFFERS);
printf(gettext(" -c NAME=VALUE set run-time parameter\n"));
printf(gettext(" -d 1-5 debugging level\n"));
printf(gettext(" -D DATADIR database directory\n"));
printf(gettext(" -F turn fsync off\n"));
printf(gettext(" -h HOSTNAME host name or IP address to listen on\n"));
printf(gettext(" -i enable TCP/IP connections\n"));
printf(gettext(" -k DIRECTORY Unix-domain socket location\n"));
#ifdef USE_SSL
printf(gettext(" -l enable SSL connections\n"));
#endif
printf(gettext(" -N MAX-CONNECT maximum number of allowed connections (default %d)\n"),
DEF_MAXBACKENDS);
printf(gettext(" -o OPTIONS pass 'OPTIONS' to each backend server\n"));
printf(gettext(" -p PORT port number to listen on (default %d)\n"), DEF_PGPORT);
printf(gettext(" -S silent mode (start in background without logging output)\n"));
printf(gettext(" --help show this help, then exit\n"));
printf(gettext(" --version output version information, then exit\n"));
printf(gettext("\nDeveloper options:\n"));
printf(gettext(" -n do not reinitialize shared memory after abnormal exit\n"));
printf(gettext(" -s send SIGSTOP to all backend servers if one dies\n"));
printf(gettext("\nPlease read the documentation for the complete list of run-time\n"
"configuration settings and how to set them on the command line or in\n"
"the configuration file.\n\n"
"Report bugs to <pgsql-bugs@postgresql.org>.\n"));
}
static int
ServerLoop(void)
{
fd_set readmask,
writemask;
int nSockets;
struct timeval now,
later;
struct timezone tz;
gettimeofday(&now, &tz);
nSockets = initMasks(&readmask, &writemask);
for (;;)
{
Port *port;
fd_set rmask,
wmask;
struct timeval timeout;
/*
* The timeout for the select() below is normally set on the basis
* of the time to the next checkpoint. However, if for some
* reason we don't have a next-checkpoint time, time out after 60
* seconds. This keeps checkpoint scheduling from locking up when
* we get new connection requests infrequently (since we are
* likely to detect checkpoint completion just after enabling
* signals below, after we've already made the decision about how
* long to wait this time).
*/
timeout.tv_sec = 60;
timeout.tv_usec = 0;
if (CheckPointPID == 0 && checkpointed &&
Shutdown == NoShutdown && !FatalError && random_seed != 0)
{
time_t now = time(NULL);
if (CheckPointTimeout + checkpointed > now)
{
/*
* Not time for checkpoint yet, so set select timeout
*/
timeout.tv_sec = CheckPointTimeout + checkpointed - now;
}
else
{
/* Time to make the checkpoint... */
CheckPointPID = CheckPointDataBase();
/*
* if fork failed, schedule another try at 0.1 normal
* delay
*/
if (CheckPointPID == 0)
{
timeout.tv_sec = CheckPointTimeout / 10;
checkpointed = now + timeout.tv_sec - CheckPointTimeout;
}
}
}
/*
* Wait for something to happen.
*/
memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
memcpy((char *) &wmask, (char *) &writemask, sizeof(fd_set));
PG_SETMASK(&UnBlockSig);
if (select(nSockets, &rmask, &wmask, (fd_set *) NULL, &timeout) < 0)
{
PG_SETMASK(&BlockSig);
if (errno == EINTR || errno == EWOULDBLOCK)
continue;
elog(LOG, "ServerLoop: select failed: %m");
return STATUS_ERROR;
}
/*
* Block all signals until we wait again. (This makes it safe for
* our signal handlers to do nontrivial work.)
*/
PG_SETMASK(&BlockSig);
/*
* Select a random seed at the time of first receiving a request.
*/
while (random_seed == 0)
{
gettimeofday(&later, &tz);
/*
* We are not sure how much precision is in tv_usec, so we
* swap the nibbles of 'later' and XOR them with 'now'. On the
* off chance that the result is 0, we loop until it isn't.
*/
random_seed = now.tv_usec ^
((later.tv_usec << 16) |
((later.tv_usec >> 16) & 0xffff));
}
/*
* New connection pending on our well-known port's socket? If so,
* fork a child process to deal with it.
*/
#ifdef HAVE_UNIX_SOCKETS
if (ServerSock_UNIX != INVALID_SOCK
&& FD_ISSET(ServerSock_UNIX, &rmask))
{
port = ConnCreate(ServerSock_UNIX);
if (port)
{
BackendStartup(port);
/*
* We no longer need the open socket or port structure in
* this process
*/
StreamClose(port->sock);
ConnFree(port);
}
}
#endif
if (ServerSock_INET != INVALID_SOCK
&& FD_ISSET(ServerSock_INET, &rmask))
{
port = ConnCreate(ServerSock_INET);
if (port)
{
BackendStartup(port);
/*
* We no longer need the open socket or port structure in
* this process
*/
StreamClose(port->sock);
ConnFree(port);
}
}
}
}
/*
* Initialise the read and write masks for select() for the well-known ports
* we are listening on. Return the number of sockets to listen on.
*/
static int
initMasks(fd_set *rmask, fd_set *wmask)
{
int nsocks = -1;
FD_ZERO(rmask);
FD_ZERO(wmask);
#ifdef HAVE_UNIX_SOCKETS
if (ServerSock_UNIX != INVALID_SOCK)
{
FD_SET(ServerSock_UNIX, rmask);
if (ServerSock_UNIX > nsocks)
nsocks = ServerSock_UNIX;
}
#endif
if (ServerSock_INET != INVALID_SOCK)
{
FD_SET(ServerSock_INET, rmask);
if (ServerSock_INET > nsocks)
nsocks = ServerSock_INET;
}
return nsocks + 1;
}
/*
* Read the startup packet and do something according to it.
*
* Returns STATUS_OK or STATUS_ERROR, or might call elog(FATAL) and
* not return at all.
*
* (Note that elog(FATAL) stuff is sent to the client, so only use it
* if that's what you want. Return STATUS_ERROR if you don't want to
* send anything to the client, which would typically be appropriate
* if we detect a communications failure.)
*/
static int
ProcessStartupPacket(Port *port, bool SSLdone)
{
StartupPacket *packet;
enum CAC_state cac;
int32 len;
void *buf;
if (pq_getbytes((char *) &len, 4) == EOF)
{
elog(LOG, "incomplete startup packet");
return STATUS_ERROR;
}
len = ntohl(len);
len -= 4;
if (len < sizeof(ProtocolVersion) || len > sizeof(StartupPacket))
elog(FATAL, "invalid length of startup packet");
/* Ensure we see zeroes for any bytes not sent */
buf = palloc0(sizeof(StartupPacket));
if (pq_getbytes(buf, len) == EOF)
{
elog(LOG, "incomplete startup packet");
return STATUS_ERROR;
}
packet = buf;
/*
* The first field is either a protocol version number or a special
* request code.
*/
port->proto = ntohl(packet->protoVersion);
if (port->proto == CANCEL_REQUEST_CODE)
{
processCancelRequest(port, packet);
return 127; /* XXX */
}
if (port->proto == NEGOTIATE_SSL_CODE && !SSLdone)
{
char SSLok;
#ifdef USE_SSL
/* No SSL when disabled or on Unix sockets */
if (!EnableSSL || port->laddr.sa.sa_family != AF_INET)
SSLok = 'N';
else
SSLok = 'S'; /* Support for SSL */
#else
SSLok = 'N'; /* No support for SSL */
#endif
if (send(port->sock, &SSLok, 1, 0) != 1)
{
elog(LOG, "failed to send SSL negotiation response: %m");
return STATUS_ERROR; /* close the connection */
}
#ifdef USE_SSL
if (SSLok == 'S' && secure_open_server(port) == -1)
return STATUS_ERROR;
#endif
/* regular startup packet, cancel, etc packet should follow... */
/* but not another SSL negotiation request */
return ProcessStartupPacket(port, true);
}
/* Could add additional special packet types here */
/* Check we can handle the protocol the frontend is using. */
if (PG_PROTOCOL_MAJOR(port->proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
PG_PROTOCOL_MAJOR(port->proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) ||
(PG_PROTOCOL_MAJOR(port->proto) == PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST) &&
PG_PROTOCOL_MINOR(port->proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))
elog(FATAL, "unsupported frontend protocol");
/*
* Get the parameters from the startup packet as C strings. The
* packet destination was cleared first so a short packet has zeros
* silently added.
*/
StrNCpy(port->database, packet->database, sizeof(port->database));
StrNCpy(port->user, packet->user, sizeof(port->user));
StrNCpy(port->options, packet->options, sizeof(port->options));
StrNCpy(port->tty, packet->tty, sizeof(port->tty));
/* The database defaults to the user name. */
if (port->database[0] == '\0')
StrNCpy(port->database, packet->user, sizeof(port->database));
/*
* Truncate given database and user names to length of a Postgres
* name. This avoids lookup failures when overlength names are given.
*/
if ((int) sizeof(port->database) >= NAMEDATALEN)
port->database[NAMEDATALEN - 1] = '\0';
if ((int) sizeof(port->user) >= NAMEDATALEN)
port->user[NAMEDATALEN - 1] = '\0';
/* Check a user name was given. */
if (port->user[0] == '\0')
elog(FATAL, "no PostgreSQL user name specified in startup packet");
if (Db_user_namespace)
{
/*
* If user@, it is a global user, remove '@'. We only want to do
* this if there is an '@' at the end and no earlier in the user
* string or they may fake as a local user of another database
* attaching to this database.
*/
if (strchr(port->user, '@') == port->user + strlen(port->user) - 1)
*strchr(port->user, '@') = '\0';
else
{
/* Append '@' and dbname */
char hold_user[SM_DATABASE_USER + 1];
snprintf(hold_user, SM_DATABASE_USER + 1, "%s@%s", port->user,
port->database);
strcpy(port->user, hold_user);
}
}
/*
* If we're going to reject the connection due to database state, say
* so now instead of wasting cycles on an authentication exchange.
* (This also allows a pg_ping utility to be written.)
*/
cac = canAcceptConnections();
switch (cac)
{
case CAC_STARTUP:
elog(FATAL, "The database system is starting up");
break;
case CAC_SHUTDOWN:
elog(FATAL, "The database system is shutting down");
break;
case CAC_RECOVERY:
elog(FATAL, "The database system is in recovery mode");
break;
case CAC_TOOMANY:
elog(FATAL, "Sorry, too many clients already");
break;
case CAC_OK:
default:
break;
}
return STATUS_OK;
}
/*
* The client has sent a cancel request packet, not a normal
* start-a-new-connection packet. Perform the necessary processing.
* Nothing is sent back to the client.
*/
static void
processCancelRequest(Port *port, void *pkt)
{
CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
int backendPID;
long cancelAuthCode;
Dlelem *curr;
Backend *bp;
backendPID = (int) ntohl(canc->backendPID);
cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
if (backendPID == CheckPointPID)
{
elog(DEBUG1, "processCancelRequest: CheckPointPID in cancel request for process %d", backendPID);
return;
}
/* See if we have a matching backend */
for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
{
bp = (Backend *) DLE_VAL(curr);
if (bp->pid == backendPID)
{
if (bp->cancel_key == cancelAuthCode)
{
/* Found a match; signal that backend to cancel current op */
elog(DEBUG1, "processing cancel request: sending SIGINT to process %d",
backendPID);
kill(bp->pid, SIGINT);
}
else
/* Right PID, wrong key: no way, Jose */
elog(DEBUG1, "bad key in cancel request for process %d",
backendPID);
return;
}
}
/* No matching backend */
elog(DEBUG1, "bad pid in cancel request for process %d", backendPID);
}
/*
* canAcceptConnections --- check to see if database state allows connections.
*/
static enum CAC_state
canAcceptConnections(void)
{
/* Can't start backends when in startup/shutdown/recovery state. */
if (Shutdown > NoShutdown)
return CAC_SHUTDOWN;
if (StartupPID)
return CAC_STARTUP;
if (FatalError)
return CAC_RECOVERY;
/*
* Don't start too many children.
*
* We allow more connections than we can have backends here because some
* might still be authenticating; they might fail auth, or some
* existing backend might exit before the auth cycle is completed. The
* exact MaxBackends limit is enforced when a new backend tries to
* join the shared-inval backend array.
*/
if (CountChildren() >= 2 * MaxBackends)
return CAC_TOOMANY;
return CAC_OK;
}
/*
* ConnCreate -- create a local connection data structure
*/
static Port *
ConnCreate(int serverFd)
{
Port *port;
if (!(port = (Port *) calloc(1, sizeof(Port))))
{
elog(LOG, "ConnCreate: malloc failed");
SignalChildren(SIGQUIT);
ExitPostmaster(1);
}
if (StreamConnection(serverFd, port) != STATUS_OK)
{
StreamClose(port->sock);
ConnFree(port);
port = NULL;
}
else
{
/*
* Precompute password salt values to use for this connection.
* It's slightly annoying to do this long in advance of knowing
* whether we'll need 'em or not, but we must do the random()
* calls before we fork, not after. Else the postmaster's random
* sequence won't get advanced, and all backends would end up
* using the same salt...
*/
RandomSalt(port->cryptSalt, port->md5Salt);
}
return port;
}
/*
* ConnFree -- free a local connection data structure
*/
static void
ConnFree(Port *conn)
{
#ifdef USE_SSL
secure_close(conn);
#endif
free(conn);
}
/*
* ClosePostmasterPorts -- close all the postmaster's open sockets
*
* This is called during child process startup to release file descriptors
* that are not needed by that child process. The postmaster still has
* them open, of course.
*/
void
ClosePostmasterPorts(bool pgstat_too)
{
/* Close the listen sockets */
if (NetServer)
StreamClose(ServerSock_INET);
ServerSock_INET = INVALID_SOCK;
#ifdef HAVE_UNIX_SOCKETS
StreamClose(ServerSock_UNIX);
ServerSock_UNIX = INVALID_SOCK;
#endif
/* Close pgstat control sockets, unless we're starting pgstat itself */
if (pgstat_too)
pgstat_close_sockets();
}
/*
* reset_shared -- reset shared memory and semaphores
*/
static void
reset_shared(unsigned short port)
{
/*
* Create or re-create shared memory and semaphores.
*
* Note: in each "cycle of life" we will normally assign the same IPC
* keys (if using SysV shmem and/or semas), since the port number is
* used to determine IPC keys. This helps ensure that we will clean
* up dead IPC objects if the postmaster crashes and is restarted.
*/
CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
}
/*
* SIGHUP -- reread config files, and tell children to do same
*/
static void
SIGHUP_handler(SIGNAL_ARGS)
{
int save_errno = errno;
PG_SETMASK(&BlockSig);
if (Shutdown <= SmartShutdown)
{
elog(LOG, "Received SIGHUP, reloading configuration files");
SignalChildren(SIGHUP);
ProcessConfigFile(PGC_SIGHUP);
load_hba();
load_ident();
}
PG_SETMASK(&UnBlockSig);
errno = save_errno;
}
/*
* pmdie -- signal handler for processing various postmaster signals.
*/
static void
pmdie(SIGNAL_ARGS)
{
int save_errno = errno;
PG_SETMASK(&BlockSig);
elog(DEBUG1, "pmdie %d", postgres_signal_arg);
switch (postgres_signal_arg)
{
case SIGTERM:
/*
* Smart Shutdown:
*
* Wait for children to end their work and ShutdownDataBase.
*/
if (Shutdown >= SmartShutdown)
break;
Shutdown = SmartShutdown;
elog(LOG, "smart shutdown request");
if (DLGetHead(BackendList)) /* let reaper() handle this */
break;
/*
* No children left. Shutdown data base system.
*/
if (StartupPID > 0 || FatalError) /* let reaper() handle
* this */
break;
if (ShutdownPID > 0)
{
elog(PANIC, "shutdown process %d already running",
(int) ShutdownPID);
abort();
}
ShutdownPID = ShutdownDataBase();
break;
case SIGINT:
/*
* Fast Shutdown:
*
* abort all children with SIGTERM (rollback active transactions
* and exit) and ShutdownDataBase when they are gone.
*/
if (Shutdown >= FastShutdown)
break;
elog(LOG, "fast shutdown request");
if (DLGetHead(BackendList)) /* let reaper() handle this */
{
Shutdown = FastShutdown;
if (!FatalError)
{
elog(LOG, "aborting any active transactions");
SignalChildren(SIGTERM);
}
break;
}
if (Shutdown > NoShutdown)
{
Shutdown = FastShutdown;
break;
}
Shutdown = FastShutdown;
/*
* No children left. Shutdown data base system.
*/
if (StartupPID > 0 || FatalError) /* let reaper() handle
* this */
break;
if (ShutdownPID > 0)
{
elog(PANIC, "shutdown process %d already running",
(int) ShutdownPID);
abort();
}
ShutdownPID = ShutdownDataBase();
break;
case SIGQUIT:
/*
* Immediate Shutdown:
*
* abort all children with SIGQUIT and exit without attempt to
* properly shutdown data base system.
*/
elog(LOG, "immediate shutdown request");
if (ShutdownPID > 0)
kill(ShutdownPID, SIGQUIT);
if (StartupPID > 0)
kill(StartupPID, SIGQUIT);
if (DLGetHead(BackendList))
SignalChildren(SIGQUIT);
ExitPostmaster(0);
break;
}
PG_SETMASK(&UnBlockSig);
errno = save_errno;
}
/*
* Reaper -- signal handler to cleanup after a backend (child) dies.
*/
static void
reaper(SIGNAL_ARGS)
{
int save_errno = errno;
#ifdef HAVE_WAITPID
int status; /* backend exit status */
#else
union wait status; /* backend exit status */
#endif
int exitstatus;
int pid; /* process id of dead backend */
PG_SETMASK(&BlockSig);
elog(DEBUG3, "reaping dead processes");
#ifdef HAVE_WAITPID
while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
{
exitstatus = status;
#else
while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
{
exitstatus = status.w_status;
#endif
/*
* Check if this child was the statistics collector. If so, start
* a new one.
*/
if (pgstat_ispgstat(pid))
{
LogChildExit(LOG, gettext("statistics collector process"),
pid, exitstatus);
pgstat_start();
continue;
}
/*
* Check if this child was a shutdown or startup process.
*/
if (ShutdownPID > 0 && pid == ShutdownPID)
{
if (exitstatus != 0)
{
LogChildExit(LOG, gettext("shutdown process"),
pid, exitstatus);
ExitPostmaster(1);
}
ExitPostmaster(0);
}
if (StartupPID > 0 && pid == StartupPID)
{
if (exitstatus != 0)
{
LogChildExit(LOG, gettext("startup process"),
pid, exitstatus);
elog(LOG, "aborting startup due to startup process failure");
ExitPostmaster(1);
}
StartupPID = 0;
FatalError = false; /* done with recovery */
if (Shutdown > NoShutdown)
{
if (ShutdownPID > 0)
{
elog(PANIC, "startup process %d died while shutdown process %d already running",
pid, (int) ShutdownPID);
abort();
}
ShutdownPID = ShutdownDataBase();
}
/*
* Startup succeeded - remember its ID and RedoRecPtr
*/
SetThisStartUpID();
/*
* Arrange for first checkpoint to occur after standard delay.
*/
CheckPointPID = 0;
checkpointed = time(NULL);
goto reaper_done;
}
CleanupProc(pid, exitstatus);
}
if (FatalError)
{
/*
* Wait for all children exit, then reset shmem and
* StartupDataBase.
*/
if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0)
goto reaper_done;
elog(LOG, "all server processes terminated; reinitializing shared memory and semaphores");
shmem_exit(0);
reset_shared(PostPortNumber);
StartupPID = StartupDataBase();
goto reaper_done;
}
if (Shutdown > NoShutdown)
{
if (DLGetHead(BackendList))
goto reaper_done;
if (StartupPID > 0 || ShutdownPID > 0)
goto reaper_done;
ShutdownPID = ShutdownDataBase();
}
reaper_done:
PG_SETMASK(&UnBlockSig);
errno = save_errno;
}
/*
* CleanupProc -- cleanup after terminated backend.
*
* Remove all local state associated with backend.
*/
static void
CleanupProc(int pid,
int exitstatus) /* child's exit status. */
{
Dlelem *curr,
*next;
Backend *bp;
LogChildExit(DEBUG1, gettext("child process"), pid, exitstatus);
/*
* If a backend dies in an ugly way (i.e. exit status not 0) then we
* must signal all other backends to quickdie. If exit status is zero
* we assume everything is hunky dory and simply remove the backend
* from the active backend list.
*/
if (exitstatus == 0)
{
curr = DLGetHead(BackendList);
while (curr)
{
bp = (Backend *) DLE_VAL(curr);
if (bp->pid == pid)
{
DLRemove(curr);
free(bp);
DLFreeElem(curr);
break;
}
curr = DLGetSucc(curr);
}
if (pid == CheckPointPID)
{
CheckPointPID = 0;
if (!FatalError)
{
checkpointed = time(NULL);
/* Update RedoRecPtr for future child backends */
GetSavedRedoRecPtr();
}
}
else
pgstat_beterm(pid);
return;
}
/* below here we're dealing with a non-normal exit */
/* Make log entry unless we did so already */
if (!FatalError)
{
LogChildExit(LOG, gettext("server process"), pid, exitstatus);
elog(LOG, "terminating any other active server processes");
}
curr = DLGetHead(BackendList);
while (curr)
{
next = DLGetSucc(curr);
bp = (Backend *) DLE_VAL(curr);
if (bp->pid != pid)
{
/*
* This backend is still alive. Unless we did so already,
* tell it to commit hara-kiri.
*
* SIGQUIT is the special signal that says exit without proc_exit
* and let the user know what's going on. But if SendStop is
* set (-s on command line), then we send SIGSTOP instead, so
* that we can get core dumps from all backends by hand.
*/
if (!FatalError)
{
elog(DEBUG1, "CleanupProc: sending %s to process %d",
(SendStop ? "SIGSTOP" : "SIGQUIT"), (int) bp->pid);
kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
}
}
else
{
/*
* Found entry for freshly-dead backend, so remove it.
*/
DLRemove(curr);
free(bp);
DLFreeElem(curr);
}
curr = next;
}
if (pid == CheckPointPID)
{
CheckPointPID = 0;
checkpointed = 0;
}
else
{
/*
* Tell the collector about backend termination
*/
pgstat_beterm(pid);
}
FatalError = true;
}
/*
* Log the death of a child process.
*/
static void
LogChildExit(int lev, const char *procname, int pid, int exitstatus)
{
/*
* translator: the first %s in these messages is a noun phrase
* describing a child process, such as "server process"
*/
if (WIFEXITED(exitstatus))
elog(lev, "%s (pid %d) exited with exit code %d",
procname, pid, WEXITSTATUS(exitstatus));
else if (WIFSIGNALED(exitstatus))
elog(lev, "%s (pid %d) was terminated by signal %d",
procname, pid, WTERMSIG(exitstatus));
else
elog(lev, "%s (pid %d) exited with unexpected status %d",
procname, pid, exitstatus);
}
/*
* Send a signal to all backend children.
*/
static void
SignalChildren(int signal)
{
Dlelem *curr,
*next;
Backend *bp;
curr = DLGetHead(BackendList);
while (curr)
{
next = DLGetSucc(curr);
bp = (Backend *) DLE_VAL(curr);
if (bp->pid != MyProcPid)
{
elog(DEBUG1, "SignalChildren: sending signal %d to process %d",
signal, (int) bp->pid);
kill(bp->pid, signal);
}
curr = next;
}
}
/*
* BackendStartup -- start backend process
*
* returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
*/
static int
BackendStartup(Port *port)
{
Backend *bn; /* for backend cleanup */
pid_t pid;
#ifdef LINUX_PROFILE
struct itimerval prof_itimer;
#endif
/*
* Compute the cancel key that will be assigned to this backend. The
* backend will have its own copy in the forked-off process' value of
* MyCancelKey, so that it can transmit the key to the frontend.
*/
MyCancelKey = PostmasterRandom();
/*
* Make room for backend data structure. Better before the fork() so
* we can handle failure cleanly.
*/
bn = (Backend *) malloc(sizeof(Backend));
if (!bn)
{
elog(LOG, "out of memory; connection startup aborted");
return STATUS_ERROR;
}
/*
* Flush stdio channels just before fork, to avoid double-output
* problems. Ideally we'd use fflush(NULL) here, but there are still a
* few non-ANSI stdio libraries out there (like SunOS 4.1.x) that
* coredump if we do. Presently stdout and stderr are the only stdio
* output channels used by the postmaster, so fflush'ing them should
* be sufficient.
*/
fflush(stdout);
fflush(stderr);
#ifdef LINUX_PROFILE
/*
* Linux's fork() resets the profiling timer in the child process. If
* we want to profile child processes then we need to save and restore
* the timer setting. This is a waste of time if not profiling,
* however, so only do it if commanded by specific -DLINUX_PROFILE
* switch.
*/
getitimer(ITIMER_PROF, &prof_itimer);
#endif
#ifdef __BEOS__
/* Specific beos actions before backend startup */
beos_before_backend_startup();
#endif
pid = fork();
if (pid == 0) /* child */
{
int status;
#ifdef LINUX_PROFILE
setitimer(ITIMER_PROF, &prof_itimer, NULL);
#endif
#ifdef __BEOS__
/* Specific beos backend startup actions */
beos_backend_startup();
#endif
free(bn);
status = DoBackend(port);
if (status != 0)
{
elog(LOG, "connection startup failed");
proc_exit(status);
}
else
proc_exit(0);
}
/* in parent, error */
if (pid < 0)
{
int save_errno = errno;
#ifdef __BEOS__
/* Specific beos backend startup actions */
beos_backend_startup_failed();
#endif
free(bn);
elog(LOG, "connection startup failed (fork failure): %s",
strerror(save_errno));
report_fork_failure_to_client(port, save_errno);
return STATUS_ERROR;
}
/* in parent, normal */
elog(DEBUG1, "BackendStartup: forked pid=%d socket=%d", (int) pid,
port->sock);
/*
* Everything's been successful, it's safe to add this backend to our
* list of backends.
*/
bn->pid = pid;
bn->cancel_key = MyCancelKey;
DLAddHead(BackendList, DLNewElem(bn));
return STATUS_OK;
}
/*
* Try to report backend fork() failure to client before we close the
* connection. Since we do not care to risk blocking the postmaster on
* this connection, we set the connection to non-blocking and try only once.
*
* This is grungy special-purpose code; we cannot use backend libpq since
* it's not up and running.
*/
static void
report_fork_failure_to_client(Port *port, int errnum)
{
char buffer[1000];
#ifdef __BEOS__
int on = 1;
#endif
/* Format the error message packet */
snprintf(buffer, sizeof(buffer), "E%s%s\n",
gettext("Server process fork() failed: "),
strerror(errnum));
/* Set port to non-blocking. Don't do send() if this fails */
#ifdef __BEOS__
if (ioctl(port->sock, FIONBIO, &on) != 0)
return;
#else
if (fcntl(port->sock, F_SETFL, O_NONBLOCK) < 0)
return;
#endif
send(port->sock, buffer, strlen(buffer) + 1, 0);
}
/*
* split_opts -- split a string of options and append it to an argv array
*
* NB: the string is destructively modified!
*
* Since no current POSTGRES arguments require any quoting characters,
* we can use the simple-minded tactic of assuming each set of space-
* delimited characters is a separate argv element.
*
* If you don't like that, well, we *used* to pass the whole option string
* as ONE argument to execl(), which was even less intelligent...
*/
static void
split_opts(char **argv, int *argcp, char *s)
{
while (s && *s)
{
while (isspace((unsigned char) *s))
++s;
if (*s == '\0')
break;
argv[(*argcp)++] = s;
while (*s && !isspace((unsigned char) *s))
++s;
if (*s)
*s++ = '\0';
}
}
/*
* DoBackend -- perform authentication, and if successful, set up the
* backend's argument list and invoke backend main().
*
* This used to perform an execv() but we no longer exec the backend;
* it's the same executable as the postmaster.
*
* returns:
* Shouldn't return at all.
* If PostgresMain() fails, return status.
*/
static int
DoBackend(Port *port)
{
char *remote_host;
char *av[ARGV_SIZE * 2];
int ac = 0;
char debugbuf[ARGV_SIZE];
char protobuf[ARGV_SIZE];
char dbbuf[ARGV_SIZE];
char optbuf[ARGV_SIZE];
char ttybuf[ARGV_SIZE];
int i;
int status;
struct timeval now;
struct timezone tz;
/*
* Let's clean up ourselves as the postmaster child
*/
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
ClientAuthInProgress = true; /* limit visibility of log messages */
/* We don't want the postmaster's proc_exit() handlers */
on_exit_reset();
/*
* Signal handlers setting is moved to tcop/postgres...
*/
/* Close the postmaster's other sockets */
ClosePostmasterPorts(true);
/* Save port etc. for ps status */
MyProcPort = port;
/* Reset MyProcPid to new backend's pid */
MyProcPid = getpid();
/*
* Initialize libpq and enable reporting of elog errors to the client.
* Must do this now because authentication uses libpq to send
* messages.
*/
pq_init(); /* initialize libpq to talk to client */
whereToSendOutput = Remote; /* now safe to elog to client */
/*
* We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT
* during any client authentication related communication. Otherwise
* the postmaster cannot shutdown the database FAST or IMMED cleanly
* if a buggy client blocks a backend during authentication.
*/
pqsignal(SIGTERM, authdie);
pqsignal(SIGQUIT, authdie);
pqsignal(SIGALRM, authdie);
PG_SETMASK(&AuthBlockSig);
/*
* Get the remote host name and port for logging and status display.
*/
if (port->raddr.sa.sa_family == AF_INET)
{
unsigned short remote_port;
char *host_addr;
remote_port = ntohs(port->raddr.in.sin_port);
host_addr = inet_ntoa(port->raddr.in.sin_addr);
remote_host = NULL;
if (HostnameLookup)
{
struct hostent *host_ent;
host_ent = gethostbyaddr((char *) &port->raddr.in.sin_addr,
sizeof(port->raddr.in.sin_addr),
AF_INET);
if (host_ent)
{
remote_host = palloc(strlen(host_addr) + strlen(host_ent->h_name) + 3);
sprintf(remote_host, "%s[%s]", host_ent->h_name, host_addr);
}
}
if (remote_host == NULL)
remote_host = pstrdup(host_addr);
if (Log_connections)
elog(LOG, "connection received: host=%s port=%hu",
remote_host, remote_port);
if (ShowPortNumber)
{
/* modify remote_host for use in ps status */
int slen = strlen(remote_host) + 10;
char *str = palloc(slen);
snprintf(str, slen, "%s:%hu", remote_host, remote_port);
pfree(remote_host);
remote_host = str;
}
}
else
{
/* not AF_INET */
remote_host = "[local]";
if (Log_connections)
elog(LOG, "connection received: host=%s",
remote_host);
}
/*
* PreAuthDelay is a debugging aid for investigating problems in the
* authentication cycle: it can be set in postgresql.conf to allow
* time to attach to the newly-forked backend with a debugger. (See
* also the -W backend switch, which we allow clients to pass through
* PGOPTIONS, but it is not honored until after authentication.)
*/
if (PreAuthDelay > 0)
sleep(PreAuthDelay);
/*
* Ready to begin client interaction. We will give up and exit(0)
* after a time delay, so that a broken client can't hog a connection
* indefinitely. PreAuthDelay doesn't count against the time limit.
*/
if (!enable_sig_alarm(AuthenticationTimeout * 1000, false))
elog(FATAL, "DoBackend: Unable to set timer for auth timeout");
/*
* Receive the startup packet (which might turn out to be a cancel
* request packet).
*/
status = ProcessStartupPacket(port, false);
if (status != STATUS_OK)
return 0; /* cancel request processed, or error */
/*
* Now that we have the user and database name, we can set the process
* title for ps. It's good to do this as early as possible in
* startup.
*/
init_ps_display(port->user, port->database, remote_host);
set_ps_display("authentication");
/*
* Now perform authentication exchange.
*/
ClientAuthentication(port); /* might not return, if failure */
/*
* Done with authentication. Disable timeout, and prevent
* SIGTERM/SIGQUIT again until backend startup is complete.
*/
if (!disable_sig_alarm(false))
elog(FATAL, "DoBackend: Unable to disable timer for auth timeout");
PG_SETMASK(&BlockSig);
if (Log_connections)
elog(LOG, "connection authorized: user=%s database=%s",
port->user, port->database);
/*
* Don't want backend to be able to see the postmaster random number
* generator state. We have to clobber the static random_seed *and*
* start a new random sequence in the random() library function.
*/
random_seed = 0;
gettimeofday(&now, &tz);
srandom((unsigned int) now.tv_usec);
/* ----------------
* Now, build the argv vector that will be given to PostgresMain.
*
* The layout of the command line is
* postgres [secure switches] -p databasename [insecure switches]
* where the switches after -p come from the client request.
* ----------------
*/
av[ac++] = "postgres";
/*
* Pass the requested debugging level along to the backend.
*/
if (debug_flag > 0)
{
sprintf(debugbuf, "-d%d", debug_flag);
av[ac++] = debugbuf;
}
/*
* Pass any backend switches specified with -o in the postmaster's own
* command line. We assume these are secure. (It's OK to mangle
* ExtraOptions since we are now in the child process; this won't
* change the postmaster's copy.)
*/
split_opts(av, &ac, ExtraOptions);
/* Tell the backend what protocol the frontend is using. */
sprintf(protobuf, "-v%u", port->proto);
av[ac++] = protobuf;
/*
* Tell the backend it is being called from the postmaster, and which
* database to use. -p marks the end of secure switches.
*/
av[ac++] = "-p";
StrNCpy(dbbuf, port->database, ARGV_SIZE);
av[ac++] = dbbuf;
/*
* Pass the (insecure) option switches from the connection request.
*/
StrNCpy(optbuf, port->options, ARGV_SIZE);
split_opts(av, &ac, optbuf);
/*
* Pass the (insecure) debug output file request.
*
* NOTE: currently, this is useless code, since the backend will not
* honor an insecure -o switch. I left it here since the backend
* could be modified to allow insecure -o, given adequate checking
* that the specified filename is something safe to write on.
*/
if (port->tty[0])
{
StrNCpy(ttybuf, port->tty, ARGV_SIZE);
av[ac++] = "-o";
av[ac++] = ttybuf;
}
av[ac] = (char *) NULL;
/*
* Release postmaster's working memory context so that backend can
* recycle the space. Note this does not trash *MyProcPort, because
* ConnCreate() allocated that space with malloc() ... else we'd need
* to copy the Port data here.
*/
MemoryContextSwitchTo(TopMemoryContext);
MemoryContextDelete(PostmasterContext);
PostmasterContext = NULL;
/*
* Debug: print arguments being passed to backend
*/
elog(DEBUG2, "%s child[%d]: starting with (", progname, MyProcPid);
for (i = 0; i < ac; ++i)
elog(DEBUG2, "\t%s", av[i]);
elog(DEBUG2, ")");
ClientAuthInProgress = false; /* client_min_messages is active
* now */
return (PostgresMain(ac, av, port->user));
}
/*
* ExitPostmaster -- cleanup
*
* Do NOT call exit() directly --- always go through here!
*/
void
ExitPostmaster(int status)
{
/* should cleanup shared memory and kill all backends */
/*
* Not sure of the semantics here. When the Postmaster dies, should
* the backends all be killed? probably not.
*
* MUST -- vadim 05-10-1999
*/
if (ServerSock_INET != INVALID_SOCK)
StreamClose(ServerSock_INET);
ServerSock_INET = INVALID_SOCK;
#ifdef HAVE_UNIX_SOCKETS
if (ServerSock_UNIX != INVALID_SOCK)
StreamClose(ServerSock_UNIX);
ServerSock_UNIX = INVALID_SOCK;
#endif
proc_exit(status);
}
/*
* sigusr1_handler - handle signal conditions from child processes
*/
static void
sigusr1_handler(SIGNAL_ARGS)
{
int save_errno = errno;
PG_SETMASK(&BlockSig);
if (CheckPostmasterSignal(PMSIGNAL_DO_CHECKPOINT))
{
/*
* Request to schedule a checkpoint
*
* Ignore request if checkpoint is already running or checkpointing
* is currently disabled
*/
if (CheckPointPID == 0 && checkpointed &&
Shutdown == NoShutdown && !FatalError && random_seed != 0)
{
CheckPointPID = CheckPointDataBase();
/* note: if fork fails, CheckPointPID stays 0; nothing happens */
}
}
if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
{
/*
* Password or group file has changed.
*/
load_user();
load_group();
}
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_CHILDREN))
{
/*
* Send SIGUSR2 to all children (triggers AsyncNotifyHandler). See
* storage/ipc/sinvaladt.c for the use of this.
*/
if (Shutdown == NoShutdown)
SignalChildren(SIGUSR2);
}
PG_SETMASK(&UnBlockSig);
errno = save_errno;
}
/*
* Dummy signal handler
*
* We use this for signals that we don't actually use in the postmaster,
* but we do use in backends. If we SIG_IGN such signals in the postmaster,
* then a newly started backend might drop a signal that arrives before it's
* able to reconfigure its signal processing. (See notes in postgres.c.)
*/
static void
dummy_handler(SIGNAL_ARGS)
{
}
/*
* CharRemap: given an int in range 0..61, produce textual encoding of it
* per crypt(3) conventions.
*/
static char
CharRemap(long ch)
{
if (ch < 0)
ch = -ch;
ch = ch % 62;
if (ch < 26)
return 'A' + ch;
ch -= 26;
if (ch < 26)
return 'a' + ch;
ch -= 26;
return '0' + ch;
}
/*
* RandomSalt
*/
static void
RandomSalt(char *cryptSalt, char *md5Salt)
{
long rand = PostmasterRandom();
cryptSalt[0] = CharRemap(rand % 62);
cryptSalt[1] = CharRemap(rand / 62);
/*
* It's okay to reuse the first random value for one of the MD5 salt
* bytes, since only one of the two salts will be sent to the client.
* After that we need to compute more random bits.
*
* We use % 255, sacrificing one possible byte value, so as to ensure
* that all bits of the random() value participate in the result.
* While at it, add one to avoid generating any null bytes.
*/
md5Salt[0] = (rand % 255) + 1;
rand = PostmasterRandom();
md5Salt[1] = (rand % 255) + 1;
rand = PostmasterRandom();
md5Salt[2] = (rand % 255) + 1;
rand = PostmasterRandom();
md5Salt[3] = (rand % 255) + 1;
}
/*
* PostmasterRandom
*/
static long
PostmasterRandom(void)
{
static bool initialized = false;
if (!initialized)
{
Assert(random_seed != 0);
srandom(random_seed);
initialized = true;
}
return random();
}
/*
* Count up number of child processes.
*/
static int
CountChildren(void)
{
Dlelem *curr;
Backend *bp;
int cnt = 0;
for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
{
bp = (Backend *) DLE_VAL(curr);
if (bp->pid != MyProcPid)
cnt++;
}
if (CheckPointPID != 0)
cnt--;
return cnt;
}
/*
* Fire off a subprocess for startup/shutdown/checkpoint.
*
* Return value is subprocess' PID, or 0 if failed to start subprocess
* (0 is returned only for checkpoint case).
*/
static pid_t
SSDataBase(int xlop)
{
pid_t pid;
Backend *bn;
#ifdef LINUX_PROFILE
struct itimerval prof_itimer;
#endif
fflush(stdout);
fflush(stderr);
#ifdef LINUX_PROFILE
/* see comments in BackendStartup */
getitimer(ITIMER_PROF, &prof_itimer);
#endif
#ifdef __BEOS__
/* Specific beos actions before backend startup */
beos_before_backend_startup();
#endif
if ((pid = fork()) == 0) /* child */
{
const char *statmsg;
char *av[ARGV_SIZE * 2];
int ac = 0;
char nbbuf[ARGV_SIZE];
char dbbuf[ARGV_SIZE];
char xlbuf[ARGV_SIZE];
#ifdef LINUX_PROFILE
setitimer(ITIMER_PROF, &prof_itimer, NULL);
#endif
#ifdef __BEOS__
/* Specific beos actions after backend startup */
beos_backend_startup();
#endif
IsUnderPostmaster = true; /* we are a postmaster subprocess
* now */
/* Lose the postmaster's on-exit routines and port connections */
on_exit_reset();
/* Close the postmaster's sockets */
ClosePostmasterPorts(true);
/*
* Identify myself via ps
*/
switch (xlop)
{
case BS_XLOG_STARTUP:
statmsg = "startup subprocess";
break;
case BS_XLOG_CHECKPOINT:
statmsg = "checkpoint subprocess";
break;
case BS_XLOG_SHUTDOWN:
statmsg = "shutdown subprocess";
break;
default:
statmsg = "??? subprocess";
break;
}
init_ps_display(statmsg, "", "");
set_ps_display("");
/* Set up command-line arguments for subprocess */
av[ac++] = "postgres";
sprintf(nbbuf, "-B%d", NBuffers);
av[ac++] = nbbuf;
sprintf(xlbuf, "-x%d", xlop);
av[ac++] = xlbuf;
av[ac++] = "-p";
StrNCpy(dbbuf, "template1", ARGV_SIZE);
av[ac++] = dbbuf;
av[ac] = (char *) NULL;
BootstrapMain(ac, av);
ExitPostmaster(0);
}
/* in parent */
if (pid < 0)
{
#ifdef __BEOS__
/* Specific beos actions before backend startup */
beos_backend_startup_failed();
#endif
switch (xlop)
{
case BS_XLOG_STARTUP:
elog(LOG, "could not launch startup process (fork failure): %m");
break;
case BS_XLOG_CHECKPOINT:
elog(LOG, "could not launch checkpoint process (fork failure): %m");
break;
case BS_XLOG_SHUTDOWN:
elog(LOG, "could not launch shutdown process (fork failure): %m");
break;
default:
break;
}
/*
* fork failure is fatal during startup/shutdown, but there's no
* need to choke if a routine checkpoint fails.
*/
if (xlop == BS_XLOG_CHECKPOINT)
return 0;
ExitPostmaster(1);
}
/*
* The startup and shutdown processes are not considered normal
* backends, but the checkpoint process is. Checkpoint must be added
* to the list of backends.
*/
if (xlop == BS_XLOG_CHECKPOINT)
{
if (!(bn = (Backend *) malloc(sizeof(Backend))))
{
elog(LOG, "CheckPointDataBase: malloc failed");
ExitPostmaster(1);
}
bn->pid = pid;
bn->cancel_key = PostmasterRandom();
DLAddHead(BackendList, DLNewElem(bn));
/*
* Since this code is executed periodically, it's a fine place to
* do other actions that should happen every now and then on no
* particular schedule. Such as...
*/
TouchSocketLockFile();
}
return pid;
}
/*
* Create the opts file
*/
static bool
CreateOptsFile(int argc, char *argv[])
{
char fullprogname[MAXPGPATH];
char *filename;
FILE *fp;
unsigned i;
if (FindExec(fullprogname, argv[0], "postmaster") < 0)
return false;
filename = palloc(strlen(DataDir) + 17);
sprintf(filename, "%s/postmaster.opts", DataDir);
if ((fp = fopen(filename, "w")) == NULL)
{
postmaster_error("cannot create file %s: %s",
filename, strerror(errno));
return false;
}
fprintf(fp, "%s", fullprogname);
for (i = 1; i < argc; i++)
fprintf(fp, " '%s'", argv[i]);
fputs("\n", fp);
if (ferror(fp))
{
postmaster_error("writing file %s failed", filename);
fclose(fp);
return false;
}
fclose(fp);
return true;
}
/*
* This should be used only for reporting "interactive" errors (ie, errors
* during startup. Once the postmaster is launched, use elog.
*/
void
postmaster_error(const char *fmt,...)
{
va_list ap;
fprintf(stderr, "%s: ", progname);
va_start(ap, fmt);
vfprintf(stderr, gettext(fmt), ap);
va_end(ap);
fprintf(stderr, "\n");
}