postgresql/src/backend/postmaster/launch_backend.c

812 lines
22 KiB
C

/*-------------------------------------------------------------------------
*
* launch_backend.c
* Functions for launching backends and other postmaster child
* processes.
*
* On Unix systems, a new child process is launched with fork(). It inherits
* all the global variables and data structures that had been initialized in
* the postmaster. After forking, the child process closes the file
* descriptors that are not needed in the child process, and sets up the
* mechanism to detect death of the parent postmaster process, etc. After
* that, it calls the right Main function depending on the kind of child
* process.
*
* In EXEC_BACKEND mode, which is used on Windows but can be enabled on other
* platforms for testing, the child process is launched by fork() + exec() (or
* CreateProcess() on Windows). It does not inherit the state from the
* postmaster, so it needs to re-attach to the shared memory, re-initialize
* global variables, reload the config file etc. to get the process to the
* same state as after fork() on a Unix system.
*
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/postmaster/launch_backend.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <unistd.h>
#include "access/xlog.h"
#include "common/file_utils.h"
#include "libpq/libpq-be.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "nodes/queryjumble.h"
#include "port.h"
#include "postmaster/autovacuum.h"
#include "postmaster/auxprocess.h"
#include "postmaster/bgworker_internals.h"
#include "postmaster/bgwriter.h"
#include "postmaster/fork_process.h"
#include "postmaster/pgarch.h"
#include "postmaster/postmaster.h"
#include "postmaster/startup.h"
#include "postmaster/syslogger.h"
#include "postmaster/walwriter.h"
#include "replication/walreceiver.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"
#include "utils/datetime.h"
#include "utils/guc.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"
#ifdef EXEC_BACKEND
#include "nodes/queryjumble.h"
#include "storage/pg_shmem.h"
#include "storage/spin.h"
#endif
#ifdef EXEC_BACKEND
/* Type for a socket that can be inherited to a client process */
#ifdef WIN32
typedef struct
{
SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
* if not a socket */
WSAPROTOCOL_INFO wsainfo;
} InheritableSocket;
#else
typedef int InheritableSocket;
#endif
/*
* Structure contains all variables passed to exec:ed backends
*/
typedef struct
{
bool has_client_sock;
ClientSocket client_sock;
InheritableSocket inh_sock;
bool has_bgworker;
BackgroundWorker bgworker;
char DataDir[MAXPGPATH];
int32 MyCancelKey;
int MyPMChildSlot;
#ifndef WIN32
unsigned long UsedShmemSegID;
#else
void *ShmemProtectiveRegion;
HANDLE UsedShmemSegID;
#endif
void *UsedShmemSegAddr;
slock_t *ShmemLock;
struct bkend *ShmemBackendArray;
#ifndef HAVE_SPINLOCKS
PGSemaphore *SpinlockSemaArray;
#endif
int NamedLWLockTrancheRequests;
NamedLWLockTranche *NamedLWLockTrancheArray;
LWLockPadded *MainLWLockArray;
slock_t *ProcStructLock;
PROC_HDR *ProcGlobal;
PGPROC *AuxiliaryProcs;
PGPROC *PreparedXactProcs;
PMSignalData *PMSignalState;
pid_t PostmasterPid;
TimestampTz PgStartTime;
TimestampTz PgReloadTime;
pg_time_t first_syslogger_file_time;
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
int max_safe_fds;
int MaxBackends;
#ifdef WIN32
HANDLE PostmasterHandle;
HANDLE initial_signal_pipe;
HANDLE syslogPipe[2];
#else
int postmaster_alive_fds[2];
int syslogPipe[2];
#endif
char my_exec_path[MAXPGPATH];
char pkglib_path[MAXPGPATH];
} BackendParameters;
#define SizeOfBackendParameters(startup_data_len) (offsetof(BackendParameters, startup_data) + startup_data_len)
void read_backend_variables(char *id, ClientSocket **client_sock, BackgroundWorker **worker);
static void restore_backend_variables(BackendParameters *param, ClientSocket **client_sock, BackgroundWorker **worker);
#ifndef WIN32
static bool save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker);
#else
static bool save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker,
HANDLE childProcess, pid_t childPid);
#endif
pid_t internal_forkexec(int argc, char *argv[], ClientSocket *client_sock, BackgroundWorker *worker);
#ifndef WIN32
/*
* internal_forkexec non-win32 implementation
*
* - writes out backend variables to the parameter file
* - fork():s, and then exec():s the child process
*/
pid_t
internal_forkexec(int argc, char *argv[], ClientSocket *client_sock, BackgroundWorker *worker)
{
static unsigned long tmpBackendFileNum = 0;
pid_t pid;
char tmpfilename[MAXPGPATH];
BackendParameters param;
FILE *fp;
/*
* Make sure padding bytes are initialized, to prevent Valgrind from
* complaining about writing uninitialized bytes to the file. This isn't
* performance critical, and the win32 implementation initializes the
* padding bytes to zeros, so do it even when not using Valgrind.
*/
memset(&param, 0, sizeof(BackendParameters));
if (!save_backend_variables(&param, client_sock, worker))
return -1; /* log made by save_backend_variables */
/* Calculate name for temp file */
snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
MyProcPid, ++tmpBackendFileNum);
/* Open file */
fp = AllocateFile(tmpfilename, PG_BINARY_W);
if (!fp)
{
/*
* As in OpenTemporaryFileInTablespace, try to make the temp-file
* directory, ignoring errors.
*/
(void) MakePGDirectory(PG_TEMP_FILES_DIR);
fp = AllocateFile(tmpfilename, PG_BINARY_W);
if (!fp)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m",
tmpfilename)));
return -1;
}
}
if (fwrite(&param, sizeof(param), 1, fp) != 1)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmpfilename)));
FreeFile(fp);
return -1;
}
/* Release file */
if (FreeFile(fp))
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmpfilename)));
return -1;
}
/* Make sure caller set up argv properly */
Assert(argc >= 3);
Assert(argv[argc] == NULL);
Assert(strncmp(argv[1], "--fork", 6) == 0);
Assert(argv[2] == NULL);
/* Insert temp file name after --fork argument */
argv[2] = tmpfilename;
/* Fire off execv in child */
if ((pid = fork_process()) == 0)
{
if (execv(postgres_exec_path, argv) < 0)
{
ereport(LOG,
(errmsg("could not execute server process \"%s\": %m",
postgres_exec_path)));
/* We're already in the child process here, can't return */
exit(1);
}
}
return pid; /* Parent returns pid, or -1 on fork failure */
}
#else /* WIN32 */
/*
* internal_forkexec win32 implementation
*
* - starts backend using CreateProcess(), in suspended state
* - writes out backend variables to the parameter file
* - during this, duplicates handles and sockets required for
* inheritance into the new process
* - resumes execution of the new process once the backend parameter
* file is complete.
*/
pid_t
internal_forkexec(int argc, char *argv[], ClientSocket *client_sock, BackgroundWorker *worker)
{
int retry_count = 0;
STARTUPINFO si;
PROCESS_INFORMATION pi;
int i;
int j;
char cmdLine[MAXPGPATH * 2];
HANDLE paramHandle;
BackendParameters *param;
SECURITY_ATTRIBUTES sa;
char paramHandleStr[32];
/* Make sure caller set up argv properly */
Assert(argc >= 3);
Assert(argv[argc] == NULL);
Assert(strncmp(argv[1], "--fork", 6) == 0);
Assert(argv[2] == NULL);
/* Resume here if we need to retry */
retry:
/* Set up shared memory for parameter passing */
ZeroMemory(&sa, sizeof(sa));
sa.nLength = sizeof(sa);
sa.bInheritHandle = TRUE;
paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
&sa,
PAGE_READWRITE,
0,
sizeof(BackendParameters),
NULL);
if (paramHandle == INVALID_HANDLE_VALUE)
{
ereport(LOG,
(errmsg("could not create backend parameter file mapping: error code %lu",
GetLastError())));
return -1;
}
param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
if (!param)
{
ereport(LOG,
(errmsg("could not map backend parameter memory: error code %lu",
GetLastError())));
CloseHandle(paramHandle);
return -1;
}
/* Insert temp file name after --fork argument */
#ifdef _WIN64
sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
#else
sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
#endif
argv[2] = paramHandleStr;
/* Format the cmd line */
cmdLine[sizeof(cmdLine) - 1] = '\0';
cmdLine[sizeof(cmdLine) - 2] = '\0';
snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
i = 0;
while (argv[++i] != NULL)
{
j = strlen(cmdLine);
snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
}
if (cmdLine[sizeof(cmdLine) - 2] != '\0')
{
ereport(LOG,
(errmsg("subprocess command line too long")));
UnmapViewOfFile(param);
CloseHandle(paramHandle);
return -1;
}
memset(&pi, 0, sizeof(pi));
memset(&si, 0, sizeof(si));
si.cb = sizeof(si);
/*
* Create the subprocess in a suspended state. This will be resumed later,
* once we have written out the parameter file.
*/
if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
NULL, NULL, &si, &pi))
{
ereport(LOG,
(errmsg("CreateProcess() call failed: %m (error code %lu)",
GetLastError())));
UnmapViewOfFile(param);
CloseHandle(paramHandle);
return -1;
}
if (!save_backend_variables(param, client_sock, worker, pi.hProcess, pi.dwProcessId))
{
/*
* log made by save_backend_variables, but we have to clean up the
* mess with the half-started process
*/
if (!TerminateProcess(pi.hProcess, 255))
ereport(LOG,
(errmsg_internal("could not terminate unstarted process: error code %lu",
GetLastError())));
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
UnmapViewOfFile(param);
CloseHandle(paramHandle);
return -1; /* log made by save_backend_variables */
}
/* Drop the parameter shared memory that is now inherited to the backend */
if (!UnmapViewOfFile(param))
ereport(LOG,
(errmsg("could not unmap view of backend parameter file: error code %lu",
GetLastError())));
if (!CloseHandle(paramHandle))
ereport(LOG,
(errmsg("could not close handle to backend parameter file: error code %lu",
GetLastError())));
/*
* Reserve the memory region used by our main shared memory segment before
* we resume the child process. Normally this should succeed, but if ASLR
* is active then it might sometimes fail due to the stack or heap having
* gotten mapped into that range. In that case, just terminate the
* process and retry.
*/
if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
{
/* pgwin32_ReserveSharedMemoryRegion already made a log entry */
if (!TerminateProcess(pi.hProcess, 255))
ereport(LOG,
(errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
GetLastError())));
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
if (++retry_count < 100)
goto retry;
ereport(LOG,
(errmsg("giving up after too many tries to reserve shared memory"),
errhint("This might be caused by ASLR or antivirus software.")));
return -1;
}
/*
* Now that the backend variables are written out, we start the child
* thread so it can start initializing while we set up the rest of the
* parent state.
*/
if (ResumeThread(pi.hThread) == -1)
{
if (!TerminateProcess(pi.hProcess, 255))
{
ereport(LOG,
(errmsg_internal("could not terminate unstartable process: error code %lu",
GetLastError())));
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
return -1;
}
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
ereport(LOG,
(errmsg_internal("could not resume thread of unstarted process: error code %lu",
GetLastError())));
return -1;
}
/* Set up notification when the child process dies */
pgwin32_register_deadchild_callback(pi.hProcess, pi.dwProcessId);
/* Don't close pi.hProcess, it's owned by the deadchild callback now */
CloseHandle(pi.hThread);
return pi.dwProcessId;
}
#endif /* WIN32 */
/*
* The following need to be available to the save/restore_backend_variables
* functions. They are marked NON_EXEC_STATIC in their home modules.
*/
extern slock_t *ShmemLock;
extern slock_t *ProcStructLock;
extern PGPROC *AuxiliaryProcs;
extern PMSignalData *PMSignalState;
extern pg_time_t first_syslogger_file_time;
extern struct bkend *ShmemBackendArray;
extern bool redirection_done;
#ifndef WIN32
#define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
#define read_inheritable_socket(dest, src) (*(dest) = *(src))
#else
static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
pid_t childPid);
static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
#endif
/* Save critical backend variables into the BackendParameters struct */
#ifndef WIN32
static bool
save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker)
#else
static bool
save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker,
HANDLE childProcess, pid_t childPid)
#endif
{
if (client_sock)
{
memcpy(&param->client_sock, client_sock, sizeof(ClientSocket));
if (!write_inheritable_socket(&param->inh_sock, client_sock->sock, childPid))
return false;
param->has_client_sock = true;
}
else
{
memset(&param->client_sock, 0, sizeof(ClientSocket));
param->has_client_sock = false;
}
if (worker)
{
memcpy(&param->bgworker, worker, sizeof(BackgroundWorker));
param->has_bgworker = true;
}
else
{
memset(&param->bgworker, 0, sizeof(BackgroundWorker));
param->has_bgworker = false;
}
strlcpy(param->DataDir, DataDir, MAXPGPATH);
param->MyCancelKey = MyCancelKey;
param->MyPMChildSlot = MyPMChildSlot;
#ifdef WIN32
param->ShmemProtectiveRegion = ShmemProtectiveRegion;
#endif
param->UsedShmemSegID = UsedShmemSegID;
param->UsedShmemSegAddr = UsedShmemSegAddr;
param->ShmemLock = ShmemLock;
param->ShmemBackendArray = ShmemBackendArray;
#ifndef HAVE_SPINLOCKS
param->SpinlockSemaArray = SpinlockSemaArray;
#endif
param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
param->MainLWLockArray = MainLWLockArray;
param->ProcStructLock = ProcStructLock;
param->ProcGlobal = ProcGlobal;
param->AuxiliaryProcs = AuxiliaryProcs;
param->PreparedXactProcs = PreparedXactProcs;
param->PMSignalState = PMSignalState;
param->PostmasterPid = PostmasterPid;
param->PgStartTime = PgStartTime;
param->PgReloadTime = PgReloadTime;
param->first_syslogger_file_time = first_syslogger_file_time;
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
#ifdef WIN32
param->PostmasterHandle = PostmasterHandle;
if (!write_duplicated_handle(&param->initial_signal_pipe,
pgwin32_create_signal_listener(childPid),
childProcess))
return false;
#else
memcpy(&param->postmaster_alive_fds, &postmaster_alive_fds,
sizeof(postmaster_alive_fds));
#endif
memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
return true;
}
#ifdef WIN32
/*
* Duplicate a handle for usage in a child process, and write the child
* process instance of the handle to the parameter file.
*/
static bool
write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
{
HANDLE hChild = INVALID_HANDLE_VALUE;
if (!DuplicateHandle(GetCurrentProcess(),
src,
childProcess,
&hChild,
0,
TRUE,
DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
{
ereport(LOG,
(errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu",
GetLastError())));
return false;
}
*dest = hChild;
return true;
}
/*
* Duplicate a socket for usage in a child process, and write the resulting
* structure to the parameter file.
* This is required because a number of LSPs (Layered Service Providers) very
* common on Windows (antivirus, firewalls, download managers etc) break
* straight socket inheritance.
*/
static bool
write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
{
dest->origsocket = src;
if (src != 0 && src != PGINVALID_SOCKET)
{
/* Actual socket */
if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
{
ereport(LOG,
(errmsg("could not duplicate socket %d for use in backend: error code %d",
(int) src, WSAGetLastError())));
return false;
}
}
return true;
}
/*
* Read a duplicate socket structure back, and get the socket descriptor.
*/
static void
read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
{
SOCKET s;
if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0)
{
/* Not a real socket! */
*dest = src->origsocket;
}
else
{
/* Actual socket, so create from structure */
s = WSASocket(FROM_PROTOCOL_INFO,
FROM_PROTOCOL_INFO,
FROM_PROTOCOL_INFO,
&src->wsainfo,
0,
0);
if (s == INVALID_SOCKET)
{
write_stderr("could not create inherited socket: error code %d\n",
WSAGetLastError());
exit(1);
}
*dest = s;
/*
* To make sure we don't get two references to the same socket, close
* the original one. (This would happen when inheritance actually
* works..
*/
closesocket(src->origsocket);
}
}
#endif
void
read_backend_variables(char *id, ClientSocket **client_sock, BackgroundWorker **worker)
{
BackendParameters param;
#ifndef WIN32
/* Non-win32 implementation reads from file */
FILE *fp;
/* Open file */
fp = AllocateFile(id, PG_BINARY_R);
if (!fp)
{
write_stderr("could not open backend variables file \"%s\": %m\n", id);
exit(1);
}
if (fread(&param, sizeof(param), 1, fp) != 1)
{
write_stderr("could not read from backend variables file \"%s\": %m\n", id);
exit(1);
}
/* Release file */
FreeFile(fp);
if (unlink(id) != 0)
{
write_stderr("could not remove file \"%s\": %m\n", id);
exit(1);
}
#else
/* Win32 version uses mapped file */
HANDLE paramHandle;
BackendParameters *paramp;
#ifdef _WIN64
paramHandle = (HANDLE) _atoi64(id);
#else
paramHandle = (HANDLE) atol(id);
#endif
paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
if (!paramp)
{
write_stderr("could not map view of backend variables: error code %lu\n",
GetLastError());
exit(1);
}
memcpy(&param, paramp, sizeof(BackendParameters));
if (!UnmapViewOfFile(paramp))
{
write_stderr("could not unmap view of backend variables: error code %lu\n",
GetLastError());
exit(1);
}
if (!CloseHandle(paramHandle))
{
write_stderr("could not close handle to backend parameter variables: error code %lu\n",
GetLastError());
exit(1);
}
#endif
restore_backend_variables(&param, client_sock, worker);
}
/* Restore critical backend variables from the BackendParameters struct */
static void
restore_backend_variables(BackendParameters *param, ClientSocket **client_sock, BackgroundWorker **worker)
{
if (param->has_client_sock)
{
*client_sock = (ClientSocket *) MemoryContextAlloc(TopMemoryContext, sizeof(ClientSocket));
memcpy(*client_sock, &param->client_sock, sizeof(ClientSocket));
read_inheritable_socket(&(*client_sock)->sock, &param->inh_sock);
}
else
*client_sock = NULL;
if (param->has_bgworker)
{
*worker = (BackgroundWorker *)
MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
memcpy(*worker, &param->bgworker, sizeof(BackgroundWorker));
}
else
*worker = NULL;
SetDataDir(param->DataDir);
MyCancelKey = param->MyCancelKey;
MyPMChildSlot = param->MyPMChildSlot;
#ifdef WIN32
ShmemProtectiveRegion = param->ShmemProtectiveRegion;
#endif
UsedShmemSegID = param->UsedShmemSegID;
UsedShmemSegAddr = param->UsedShmemSegAddr;
ShmemLock = param->ShmemLock;
ShmemBackendArray = param->ShmemBackendArray;
#ifndef HAVE_SPINLOCKS
SpinlockSemaArray = param->SpinlockSemaArray;
#endif
NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
MainLWLockArray = param->MainLWLockArray;
ProcStructLock = param->ProcStructLock;
ProcGlobal = param->ProcGlobal;
AuxiliaryProcs = param->AuxiliaryProcs;
PreparedXactProcs = param->PreparedXactProcs;
PMSignalState = param->PMSignalState;
PostmasterPid = param->PostmasterPid;
PgStartTime = param->PgStartTime;
PgReloadTime = param->PgReloadTime;
first_syslogger_file_time = param->first_syslogger_file_time;
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
#ifdef WIN32
PostmasterHandle = param->PostmasterHandle;
pgwin32_initial_signal_pipe = param->initial_signal_pipe;
#else
memcpy(&postmaster_alive_fds, &param->postmaster_alive_fds,
sizeof(postmaster_alive_fds));
#endif
memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
/*
* We need to restore fd.c's counts of externally-opened FDs; to avoid
* confusion, be sure to do this after restoring max_safe_fds. (Note:
* BackendInitialize will handle this for (*client_sock)->sock.)
*/
#ifndef WIN32
if (postmaster_alive_fds[0] >= 0)
ReserveExternalFD();
if (postmaster_alive_fds[1] >= 0)
ReserveExternalFD();
#endif
}
#endif /* EXEC_BACKEND */