/*------------------------------------------------------------------------- * * launch_backend.c * Functions for launching backends and other postmaster child * processes. * * On Unix systems, a new child process is launched with fork(). It inherits * all the global variables and data structures that had been initialized in * the postmaster. After forking, the child process closes the file * descriptors that are not needed in the child process, and sets up the * mechanism to detect death of the parent postmaster process, etc. After * that, it calls the right Main function depending on the kind of child * process. * * In EXEC_BACKEND mode, which is used on Windows but can be enabled on other * platforms for testing, the child process is launched by fork() + exec() (or * CreateProcess() on Windows). It does not inherit the state from the * postmaster, so it needs to re-attach to the shared memory, re-initialize * global variables, reload the config file etc. to get the process to the * same state as after fork() on a Unix system. * * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/backend/postmaster/launch_backend.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include "access/xlog.h" #include "common/file_utils.h" #include "libpq/libpq-be.h" #include "libpq/pqsignal.h" #include "miscadmin.h" #include "nodes/queryjumble.h" #include "port.h" #include "postmaster/autovacuum.h" #include "postmaster/auxprocess.h" #include "postmaster/bgworker_internals.h" #include "postmaster/bgwriter.h" #include "postmaster/fork_process.h" #include "postmaster/pgarch.h" #include "postmaster/postmaster.h" #include "postmaster/startup.h" #include "postmaster/syslogger.h" #include "postmaster/walwriter.h" #include "replication/walreceiver.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "storage/proc.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" #include "utils/datetime.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/timestamp.h" #ifdef EXEC_BACKEND #include "nodes/queryjumble.h" #include "storage/pg_shmem.h" #include "storage/spin.h" #endif #ifdef EXEC_BACKEND /* Type for a socket that can be inherited to a client process */ #ifdef WIN32 typedef struct { SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET * if not a socket */ WSAPROTOCOL_INFO wsainfo; } InheritableSocket; #else typedef int InheritableSocket; #endif /* * Structure contains all variables passed to exec:ed backends */ typedef struct { bool has_client_sock; ClientSocket client_sock; InheritableSocket inh_sock; bool has_bgworker; BackgroundWorker bgworker; char DataDir[MAXPGPATH]; int32 MyCancelKey; int MyPMChildSlot; #ifndef WIN32 unsigned long UsedShmemSegID; #else void *ShmemProtectiveRegion; HANDLE UsedShmemSegID; #endif void *UsedShmemSegAddr; slock_t *ShmemLock; struct bkend *ShmemBackendArray; #ifndef HAVE_SPINLOCKS PGSemaphore *SpinlockSemaArray; #endif int NamedLWLockTrancheRequests; NamedLWLockTranche *NamedLWLockTrancheArray; LWLockPadded *MainLWLockArray; slock_t *ProcStructLock; PROC_HDR *ProcGlobal; PGPROC *AuxiliaryProcs; PGPROC *PreparedXactProcs; PMSignalData *PMSignalState; pid_t PostmasterPid; TimestampTz PgStartTime; TimestampTz PgReloadTime; pg_time_t first_syslogger_file_time; bool redirection_done; bool IsBinaryUpgrade; bool query_id_enabled; int max_safe_fds; int MaxBackends; #ifdef WIN32 HANDLE PostmasterHandle; HANDLE initial_signal_pipe; HANDLE syslogPipe[2]; #else int postmaster_alive_fds[2]; int syslogPipe[2]; #endif char my_exec_path[MAXPGPATH]; char pkglib_path[MAXPGPATH]; } BackendParameters; #define SizeOfBackendParameters(startup_data_len) (offsetof(BackendParameters, startup_data) + startup_data_len) void read_backend_variables(char *id, ClientSocket **client_sock, BackgroundWorker **worker); static void restore_backend_variables(BackendParameters *param, ClientSocket **client_sock, BackgroundWorker **worker); #ifndef WIN32 static bool save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker); #else static bool save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker, HANDLE childProcess, pid_t childPid); #endif pid_t internal_forkexec(int argc, char *argv[], ClientSocket *client_sock, BackgroundWorker *worker); #ifndef WIN32 /* * internal_forkexec non-win32 implementation * * - writes out backend variables to the parameter file * - fork():s, and then exec():s the child process */ pid_t internal_forkexec(int argc, char *argv[], ClientSocket *client_sock, BackgroundWorker *worker) { static unsigned long tmpBackendFileNum = 0; pid_t pid; char tmpfilename[MAXPGPATH]; BackendParameters param; FILE *fp; /* * Make sure padding bytes are initialized, to prevent Valgrind from * complaining about writing uninitialized bytes to the file. This isn't * performance critical, and the win32 implementation initializes the * padding bytes to zeros, so do it even when not using Valgrind. */ memset(¶m, 0, sizeof(BackendParameters)); if (!save_backend_variables(¶m, client_sock, worker)) return -1; /* log made by save_backend_variables */ /* Calculate name for temp file */ snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu", PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX, MyProcPid, ++tmpBackendFileNum); /* Open file */ fp = AllocateFile(tmpfilename, PG_BINARY_W); if (!fp) { /* * As in OpenTemporaryFileInTablespace, try to make the temp-file * directory, ignoring errors. */ (void) MakePGDirectory(PG_TEMP_FILES_DIR); fp = AllocateFile(tmpfilename, PG_BINARY_W); if (!fp) { ereport(LOG, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmpfilename))); return -1; } } if (fwrite(¶m, sizeof(param), 1, fp) != 1) { ereport(LOG, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", tmpfilename))); FreeFile(fp); return -1; } /* Release file */ if (FreeFile(fp)) { ereport(LOG, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", tmpfilename))); return -1; } /* Make sure caller set up argv properly */ Assert(argc >= 3); Assert(argv[argc] == NULL); Assert(strncmp(argv[1], "--fork", 6) == 0); Assert(argv[2] == NULL); /* Insert temp file name after --fork argument */ argv[2] = tmpfilename; /* Fire off execv in child */ if ((pid = fork_process()) == 0) { if (execv(postgres_exec_path, argv) < 0) { ereport(LOG, (errmsg("could not execute server process \"%s\": %m", postgres_exec_path))); /* We're already in the child process here, can't return */ exit(1); } } return pid; /* Parent returns pid, or -1 on fork failure */ } #else /* WIN32 */ /* * internal_forkexec win32 implementation * * - starts backend using CreateProcess(), in suspended state * - writes out backend variables to the parameter file * - during this, duplicates handles and sockets required for * inheritance into the new process * - resumes execution of the new process once the backend parameter * file is complete. */ pid_t internal_forkexec(int argc, char *argv[], ClientSocket *client_sock, BackgroundWorker *worker) { int retry_count = 0; STARTUPINFO si; PROCESS_INFORMATION pi; int i; int j; char cmdLine[MAXPGPATH * 2]; HANDLE paramHandle; BackendParameters *param; SECURITY_ATTRIBUTES sa; char paramHandleStr[32]; /* Make sure caller set up argv properly */ Assert(argc >= 3); Assert(argv[argc] == NULL); Assert(strncmp(argv[1], "--fork", 6) == 0); Assert(argv[2] == NULL); /* Resume here if we need to retry */ retry: /* Set up shared memory for parameter passing */ ZeroMemory(&sa, sizeof(sa)); sa.nLength = sizeof(sa); sa.bInheritHandle = TRUE; paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE, &sa, PAGE_READWRITE, 0, sizeof(BackendParameters), NULL); if (paramHandle == INVALID_HANDLE_VALUE) { ereport(LOG, (errmsg("could not create backend parameter file mapping: error code %lu", GetLastError()))); return -1; } param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters)); if (!param) { ereport(LOG, (errmsg("could not map backend parameter memory: error code %lu", GetLastError()))); CloseHandle(paramHandle); return -1; } /* Insert temp file name after --fork argument */ #ifdef _WIN64 sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle); #else sprintf(paramHandleStr, "%lu", (DWORD) paramHandle); #endif argv[2] = paramHandleStr; /* Format the cmd line */ cmdLine[sizeof(cmdLine) - 1] = '\0'; cmdLine[sizeof(cmdLine) - 2] = '\0'; snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path); i = 0; while (argv[++i] != NULL) { j = strlen(cmdLine); snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]); } if (cmdLine[sizeof(cmdLine) - 2] != '\0') { ereport(LOG, (errmsg("subprocess command line too long"))); UnmapViewOfFile(param); CloseHandle(paramHandle); return -1; } memset(&pi, 0, sizeof(pi)); memset(&si, 0, sizeof(si)); si.cb = sizeof(si); /* * Create the subprocess in a suspended state. This will be resumed later, * once we have written out the parameter file. */ if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED, NULL, NULL, &si, &pi)) { ereport(LOG, (errmsg("CreateProcess() call failed: %m (error code %lu)", GetLastError()))); UnmapViewOfFile(param); CloseHandle(paramHandle); return -1; } if (!save_backend_variables(param, client_sock, worker, pi.hProcess, pi.dwProcessId)) { /* * log made by save_backend_variables, but we have to clean up the * mess with the half-started process */ if (!TerminateProcess(pi.hProcess, 255)) ereport(LOG, (errmsg_internal("could not terminate unstarted process: error code %lu", GetLastError()))); CloseHandle(pi.hProcess); CloseHandle(pi.hThread); UnmapViewOfFile(param); CloseHandle(paramHandle); return -1; /* log made by save_backend_variables */ } /* Drop the parameter shared memory that is now inherited to the backend */ if (!UnmapViewOfFile(param)) ereport(LOG, (errmsg("could not unmap view of backend parameter file: error code %lu", GetLastError()))); if (!CloseHandle(paramHandle)) ereport(LOG, (errmsg("could not close handle to backend parameter file: error code %lu", GetLastError()))); /* * Reserve the memory region used by our main shared memory segment before * we resume the child process. Normally this should succeed, but if ASLR * is active then it might sometimes fail due to the stack or heap having * gotten mapped into that range. In that case, just terminate the * process and retry. */ if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess)) { /* pgwin32_ReserveSharedMemoryRegion already made a log entry */ if (!TerminateProcess(pi.hProcess, 255)) ereport(LOG, (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu", GetLastError()))); CloseHandle(pi.hProcess); CloseHandle(pi.hThread); if (++retry_count < 100) goto retry; ereport(LOG, (errmsg("giving up after too many tries to reserve shared memory"), errhint("This might be caused by ASLR or antivirus software."))); return -1; } /* * Now that the backend variables are written out, we start the child * thread so it can start initializing while we set up the rest of the * parent state. */ if (ResumeThread(pi.hThread) == -1) { if (!TerminateProcess(pi.hProcess, 255)) { ereport(LOG, (errmsg_internal("could not terminate unstartable process: error code %lu", GetLastError()))); CloseHandle(pi.hProcess); CloseHandle(pi.hThread); return -1; } CloseHandle(pi.hProcess); CloseHandle(pi.hThread); ereport(LOG, (errmsg_internal("could not resume thread of unstarted process: error code %lu", GetLastError()))); return -1; } /* Set up notification when the child process dies */ pgwin32_register_deadchild_callback(pi.hProcess, pi.dwProcessId); /* Don't close pi.hProcess, it's owned by the deadchild callback now */ CloseHandle(pi.hThread); return pi.dwProcessId; } #endif /* WIN32 */ /* * The following need to be available to the save/restore_backend_variables * functions. They are marked NON_EXEC_STATIC in their home modules. */ extern slock_t *ShmemLock; extern slock_t *ProcStructLock; extern PGPROC *AuxiliaryProcs; extern PMSignalData *PMSignalState; extern pg_time_t first_syslogger_file_time; extern struct bkend *ShmemBackendArray; extern bool redirection_done; #ifndef WIN32 #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true) #define read_inheritable_socket(dest, src) (*(dest) = *(src)) #else static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child); static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childPid); static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src); #endif /* Save critical backend variables into the BackendParameters struct */ #ifndef WIN32 static bool save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker) #else static bool save_backend_variables(BackendParameters *param, ClientSocket *client_sock, BackgroundWorker *worker, HANDLE childProcess, pid_t childPid) #endif { if (client_sock) { memcpy(¶m->client_sock, client_sock, sizeof(ClientSocket)); if (!write_inheritable_socket(¶m->inh_sock, client_sock->sock, childPid)) return false; param->has_client_sock = true; } else { memset(¶m->client_sock, 0, sizeof(ClientSocket)); param->has_client_sock = false; } if (worker) { memcpy(¶m->bgworker, worker, sizeof(BackgroundWorker)); param->has_bgworker = true; } else { memset(¶m->bgworker, 0, sizeof(BackgroundWorker)); param->has_bgworker = false; } strlcpy(param->DataDir, DataDir, MAXPGPATH); param->MyCancelKey = MyCancelKey; param->MyPMChildSlot = MyPMChildSlot; #ifdef WIN32 param->ShmemProtectiveRegion = ShmemProtectiveRegion; #endif param->UsedShmemSegID = UsedShmemSegID; param->UsedShmemSegAddr = UsedShmemSegAddr; param->ShmemLock = ShmemLock; param->ShmemBackendArray = ShmemBackendArray; #ifndef HAVE_SPINLOCKS param->SpinlockSemaArray = SpinlockSemaArray; #endif param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests; param->NamedLWLockTrancheArray = NamedLWLockTrancheArray; param->MainLWLockArray = MainLWLockArray; param->ProcStructLock = ProcStructLock; param->ProcGlobal = ProcGlobal; param->AuxiliaryProcs = AuxiliaryProcs; param->PreparedXactProcs = PreparedXactProcs; param->PMSignalState = PMSignalState; param->PostmasterPid = PostmasterPid; param->PgStartTime = PgStartTime; param->PgReloadTime = PgReloadTime; param->first_syslogger_file_time = first_syslogger_file_time; param->redirection_done = redirection_done; param->IsBinaryUpgrade = IsBinaryUpgrade; param->query_id_enabled = query_id_enabled; param->max_safe_fds = max_safe_fds; param->MaxBackends = MaxBackends; #ifdef WIN32 param->PostmasterHandle = PostmasterHandle; if (!write_duplicated_handle(¶m->initial_signal_pipe, pgwin32_create_signal_listener(childPid), childProcess)) return false; #else memcpy(¶m->postmaster_alive_fds, &postmaster_alive_fds, sizeof(postmaster_alive_fds)); #endif memcpy(¶m->syslogPipe, &syslogPipe, sizeof(syslogPipe)); strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH); strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH); return true; } #ifdef WIN32 /* * Duplicate a handle for usage in a child process, and write the child * process instance of the handle to the parameter file. */ static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess) { HANDLE hChild = INVALID_HANDLE_VALUE; if (!DuplicateHandle(GetCurrentProcess(), src, childProcess, &hChild, 0, TRUE, DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS)) { ereport(LOG, (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu", GetLastError()))); return false; } *dest = hChild; return true; } /* * Duplicate a socket for usage in a child process, and write the resulting * structure to the parameter file. * This is required because a number of LSPs (Layered Service Providers) very * common on Windows (antivirus, firewalls, download managers etc) break * straight socket inheritance. */ static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid) { dest->origsocket = src; if (src != 0 && src != PGINVALID_SOCKET) { /* Actual socket */ if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0) { ereport(LOG, (errmsg("could not duplicate socket %d for use in backend: error code %d", (int) src, WSAGetLastError()))); return false; } } return true; } /* * Read a duplicate socket structure back, and get the socket descriptor. */ static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src) { SOCKET s; if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0) { /* Not a real socket! */ *dest = src->origsocket; } else { /* Actual socket, so create from structure */ s = WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO, &src->wsainfo, 0, 0); if (s == INVALID_SOCKET) { write_stderr("could not create inherited socket: error code %d\n", WSAGetLastError()); exit(1); } *dest = s; /* * To make sure we don't get two references to the same socket, close * the original one. (This would happen when inheritance actually * works.. */ closesocket(src->origsocket); } } #endif void read_backend_variables(char *id, ClientSocket **client_sock, BackgroundWorker **worker) { BackendParameters param; #ifndef WIN32 /* Non-win32 implementation reads from file */ FILE *fp; /* Open file */ fp = AllocateFile(id, PG_BINARY_R); if (!fp) { write_stderr("could not open backend variables file \"%s\": %m\n", id); exit(1); } if (fread(¶m, sizeof(param), 1, fp) != 1) { write_stderr("could not read from backend variables file \"%s\": %m\n", id); exit(1); } /* Release file */ FreeFile(fp); if (unlink(id) != 0) { write_stderr("could not remove file \"%s\": %m\n", id); exit(1); } #else /* Win32 version uses mapped file */ HANDLE paramHandle; BackendParameters *paramp; #ifdef _WIN64 paramHandle = (HANDLE) _atoi64(id); #else paramHandle = (HANDLE) atol(id); #endif paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0); if (!paramp) { write_stderr("could not map view of backend variables: error code %lu\n", GetLastError()); exit(1); } memcpy(¶m, paramp, sizeof(BackendParameters)); if (!UnmapViewOfFile(paramp)) { write_stderr("could not unmap view of backend variables: error code %lu\n", GetLastError()); exit(1); } if (!CloseHandle(paramHandle)) { write_stderr("could not close handle to backend parameter variables: error code %lu\n", GetLastError()); exit(1); } #endif restore_backend_variables(¶m, client_sock, worker); } /* Restore critical backend variables from the BackendParameters struct */ static void restore_backend_variables(BackendParameters *param, ClientSocket **client_sock, BackgroundWorker **worker) { if (param->has_client_sock) { *client_sock = (ClientSocket *) MemoryContextAlloc(TopMemoryContext, sizeof(ClientSocket)); memcpy(*client_sock, ¶m->client_sock, sizeof(ClientSocket)); read_inheritable_socket(&(*client_sock)->sock, ¶m->inh_sock); } else *client_sock = NULL; if (param->has_bgworker) { *worker = (BackgroundWorker *) MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker)); memcpy(*worker, ¶m->bgworker, sizeof(BackgroundWorker)); } else *worker = NULL; SetDataDir(param->DataDir); MyCancelKey = param->MyCancelKey; MyPMChildSlot = param->MyPMChildSlot; #ifdef WIN32 ShmemProtectiveRegion = param->ShmemProtectiveRegion; #endif UsedShmemSegID = param->UsedShmemSegID; UsedShmemSegAddr = param->UsedShmemSegAddr; ShmemLock = param->ShmemLock; ShmemBackendArray = param->ShmemBackendArray; #ifndef HAVE_SPINLOCKS SpinlockSemaArray = param->SpinlockSemaArray; #endif NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests; NamedLWLockTrancheArray = param->NamedLWLockTrancheArray; MainLWLockArray = param->MainLWLockArray; ProcStructLock = param->ProcStructLock; ProcGlobal = param->ProcGlobal; AuxiliaryProcs = param->AuxiliaryProcs; PreparedXactProcs = param->PreparedXactProcs; PMSignalState = param->PMSignalState; PostmasterPid = param->PostmasterPid; PgStartTime = param->PgStartTime; PgReloadTime = param->PgReloadTime; first_syslogger_file_time = param->first_syslogger_file_time; redirection_done = param->redirection_done; IsBinaryUpgrade = param->IsBinaryUpgrade; query_id_enabled = param->query_id_enabled; max_safe_fds = param->max_safe_fds; MaxBackends = param->MaxBackends; #ifdef WIN32 PostmasterHandle = param->PostmasterHandle; pgwin32_initial_signal_pipe = param->initial_signal_pipe; #else memcpy(&postmaster_alive_fds, ¶m->postmaster_alive_fds, sizeof(postmaster_alive_fds)); #endif memcpy(&syslogPipe, ¶m->syslogPipe, sizeof(syslogPipe)); strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH); strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH); /* * We need to restore fd.c's counts of externally-opened FDs; to avoid * confusion, be sure to do this after restoring max_safe_fds. (Note: * BackendInitialize will handle this for (*client_sock)->sock.) */ #ifndef WIN32 if (postmaster_alive_fds[0] >= 0) ReserveExternalFD(); if (postmaster_alive_fds[1] >= 0) ReserveExternalFD(); #endif } #endif /* EXEC_BACKEND */