Use ResourceOwner to track WaitEventSets.

A WaitEventSet holds file descriptors or event handles (on Windows).
If FreeWaitEventSet is not called, those fds or handles are leaked.
Use ResourceOwners to track WaitEventSets, to clean those up
automatically on error.

This was a live bug in async Append nodes, if a FDW's
ForeignAsyncRequest function failed. (In back branches, I will apply a
more localized fix for that based on PG_TRY-PG_FINALLY.)

The added test doesn't check for leaking resources, so it passed even
before this commit. But at least it covers the code path.

In the passing, fix misleading comment on what the 'nevents' argument
to WaitEventSetWait means.

Report by Alexander Lakhin, analysis and suggestion for the fix by
Tom Lane. Fixes bug #17828.

Reviewed-by: Alexander Lakhin, Thomas Munro
Discussion: https://www.postgresql.org/message-id/472235.1678387869@sss.pgh.pa.us
This commit is contained in:
Heikki Linnakangas 2023-11-23 13:31:36 +02:00
parent 414e75540f
commit 50c67c2019
9 changed files with 84 additions and 13 deletions

View File

@ -10809,6 +10809,13 @@ SELECT * FROM result_tbl ORDER BY a;
(2 rows)
DELETE FROM result_tbl;
-- Test error handling, if accessing one of the foreign partitions errors out
CREATE FOREIGN TABLE async_p_broken PARTITION OF async_pt FOR VALUES FROM (10000) TO (10001)
SERVER loopback OPTIONS (table_name 'non_existent_table');
SELECT * FROM async_pt;
ERROR: relation "public.non_existent_table" does not exist
CONTEXT: remote SQL command: SELECT a, b, c FROM public.non_existent_table
DROP FOREIGN TABLE async_p_broken;
-- Check case where multiple partitions use the same connection
CREATE TABLE base_tbl3 (a int, b int, c text);
CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000)

View File

@ -3607,6 +3607,12 @@ INSERT INTO result_tbl SELECT a, b, 'AAA' || c FROM async_pt WHERE b === 505;
SELECT * FROM result_tbl ORDER BY a;
DELETE FROM result_tbl;
-- Test error handling, if accessing one of the foreign partitions errors out
CREATE FOREIGN TABLE async_p_broken PARTITION OF async_pt FOR VALUES FROM (10000) TO (10001)
SERVER loopback OPTIONS (table_name 'non_existent_table');
SELECT * FROM async_pt;
DROP FOREIGN TABLE async_p_broken;
-- Check case where multiple partitions use the same connection
CREATE TABLE base_tbl3 (a int, b int, c text);
CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000)

View File

@ -1025,7 +1025,8 @@ ExecAppendAsyncEventWait(AppendState *node)
/* We should never be called when there are no valid async subplans. */
Assert(node->as_nasyncremain > 0);
node->as_eventset = CreateWaitEventSet(CurrentMemoryContext, nevents);
Assert(node->as_eventset == NULL);
node->as_eventset = CreateWaitEventSet(CurrentResourceOwner, nevents);
AddWaitEventToSet(node->as_eventset, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
NULL, NULL);
@ -1050,7 +1051,7 @@ ExecAppendAsyncEventWait(AppendState *node)
return;
}
/* We wait on at most EVENT_BUFFER_SIZE events. */
/* Return at most EVENT_BUFFER_SIZE events in one call. */
if (nevents > EVENT_BUFFER_SIZE)
nevents = EVENT_BUFFER_SIZE;

View File

@ -207,7 +207,7 @@ pq_init(void)
elog(FATAL, "fcntl(F_SETFD) failed on socket: %m");
#endif
FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, FeBeWaitSetNEvents);
FeBeWaitSet = CreateWaitEventSet(NULL, FeBeWaitSetNEvents);
socket_pos = AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE,
MyProcPort->sock, NULL, NULL);
latch_pos = AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,

View File

@ -1695,7 +1695,7 @@ ConfigurePostmasterWaitSet(bool accept_connections)
FreeWaitEventSet(pm_wait_set);
pm_wait_set = NULL;
pm_wait_set = CreateWaitEventSet(CurrentMemoryContext,
pm_wait_set = CreateWaitEventSet(NULL,
accept_connections ? (1 + NumListenSockets) : 1);
AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
NULL);

View File

@ -311,7 +311,7 @@ SysLoggerMain(int argc, char *argv[])
* syslog pipe, which implies that all other backends have exited
* (including the postmaster).
*/
wes = CreateWaitEventSet(CurrentMemoryContext, 2);
wes = CreateWaitEventSet(NULL, 2);
AddWaitEventToSet(wes, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
#ifndef WIN32
AddWaitEventToSet(wes, WL_SOCKET_READABLE, syslogPipe[0], NULL, NULL);

View File

@ -62,6 +62,7 @@
#include "storage/pmsignal.h"
#include "storage/shmem.h"
#include "utils/memutils.h"
#include "utils/resowner.h"
/*
* Select the fd readiness primitive to use. Normally the "most modern"
@ -101,6 +102,8 @@
/* typedef in latch.h */
struct WaitEventSet
{
ResourceOwner owner;
int nevents; /* number of registered events */
int nevents_space; /* maximum number of events in this set */
@ -195,6 +198,31 @@ static void WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event);
static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
WaitEvent *occurred_events, int nevents);
/* ResourceOwner support to hold WaitEventSets */
static void ResOwnerReleaseWaitEventSet(Datum res);
static const ResourceOwnerDesc wait_event_set_resowner_desc =
{
.name = "WaitEventSet",
.release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
.release_priority = RELEASE_PRIO_WAITEVENTSETS,
.ReleaseResource = ResOwnerReleaseWaitEventSet,
.DebugPrint = NULL
};
/* Convenience wrappers over ResourceOwnerRemember/Forget */
static inline void
ResourceOwnerRememberWaitEventSet(ResourceOwner owner, WaitEventSet *set)
{
ResourceOwnerRemember(owner, PointerGetDatum(set), &wait_event_set_resowner_desc);
}
static inline void
ResourceOwnerForgetWaitEventSet(ResourceOwner owner, WaitEventSet *set)
{
ResourceOwnerForget(owner, PointerGetDatum(set), &wait_event_set_resowner_desc);
}
/*
* Initialize the process-local latch infrastructure.
*
@ -323,7 +351,7 @@ InitializeLatchWaitSet(void)
Assert(LatchWaitSet == NULL);
/* Set up the WaitEventSet used by WaitLatch(). */
LatchWaitSet = CreateWaitEventSet(TopMemoryContext, 2);
LatchWaitSet = CreateWaitEventSet(NULL, 2);
latch_pos = AddWaitEventToSet(LatchWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,
MyLatch, NULL);
if (IsUnderPostmaster)
@ -541,7 +569,7 @@ WaitLatchOrSocket(Latch *latch, int wakeEvents, pgsocket sock,
int ret = 0;
int rc;
WaitEvent event;
WaitEventSet *set = CreateWaitEventSet(CurrentMemoryContext, 3);
WaitEventSet *set = CreateWaitEventSet(CurrentResourceOwner, 3);
if (wakeEvents & WL_TIMEOUT)
Assert(timeout >= 0);
@ -716,9 +744,12 @@ ResetLatch(Latch *latch)
*
* These events can then be efficiently waited upon together, using
* WaitEventSetWait().
*
* The WaitEventSet is tracked by the given 'resowner'. Use NULL for session
* lifetime.
*/
WaitEventSet *
CreateWaitEventSet(MemoryContext context, int nevents)
CreateWaitEventSet(ResourceOwner resowner, int nevents)
{
WaitEventSet *set;
char *data;
@ -744,7 +775,10 @@ CreateWaitEventSet(MemoryContext context, int nevents)
sz += MAXALIGN(sizeof(HANDLE) * (nevents + 1));
#endif
data = (char *) MemoryContextAllocZero(context, sz);
if (resowner != NULL)
ResourceOwnerEnlarge(resowner);
data = (char *) MemoryContextAllocZero(TopMemoryContext, sz);
set = (WaitEventSet *) data;
data += MAXALIGN(sizeof(WaitEventSet));
@ -770,6 +804,12 @@ CreateWaitEventSet(MemoryContext context, int nevents)
set->nevents_space = nevents;
set->exit_on_postmaster_death = false;
if (resowner != NULL)
{
ResourceOwnerRememberWaitEventSet(resowner, set);
set->owner = resowner;
}
#if defined(WAIT_USE_EPOLL)
if (!AcquireExternalFD())
{
@ -834,6 +874,12 @@ CreateWaitEventSet(MemoryContext context, int nevents)
void
FreeWaitEventSet(WaitEventSet *set)
{
if (set->owner)
{
ResourceOwnerForgetWaitEventSet(set->owner, set);
set->owner = NULL;
}
#if defined(WAIT_USE_EPOLL)
close(set->epoll_fd);
ReleaseExternalFD();
@ -841,9 +887,7 @@ FreeWaitEventSet(WaitEventSet *set)
close(set->kqueue_fd);
ReleaseExternalFD();
#elif defined(WAIT_USE_WIN32)
WaitEvent *cur_event;
for (cur_event = set->events;
for (WaitEvent *cur_event = set->events;
cur_event < (set->events + set->nevents);
cur_event++)
{
@ -2300,3 +2344,13 @@ drain(void)
}
#endif
static void
ResOwnerReleaseWaitEventSet(Datum res)
{
WaitEventSet *set = (WaitEventSet *) DatumGetPointer(res);
Assert(set->owner != NULL);
set->owner = NULL;
FreeWaitEventSet(set);
}

View File

@ -102,6 +102,8 @@
#include <signal.h>
#include "utils/resowner.h"
/*
* Latch structure should be treated as opaque and only accessed through
* the public functions. It is defined here to allow embedding Latches as
@ -173,7 +175,7 @@ extern void SetLatch(Latch *latch);
extern void ResetLatch(Latch *latch);
extern void ShutdownLatchSupport(void);
extern WaitEventSet *CreateWaitEventSet(MemoryContext context, int nevents);
extern WaitEventSet *CreateWaitEventSet(ResourceOwner resowner, int nevents);
extern void FreeWaitEventSet(WaitEventSet *set);
extern void FreeWaitEventSetAfterFork(WaitEventSet *set);
extern int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd,

View File

@ -74,6 +74,7 @@ typedef uint32 ResourceReleasePriority;
#define RELEASE_PRIO_TUPDESC_REFS 400
#define RELEASE_PRIO_SNAPSHOT_REFS 500
#define RELEASE_PRIO_FILES 600
#define RELEASE_PRIO_WAITEVENTSETS 700
/* 0 is considered invalid */
#define RELEASE_PRIO_FIRST 1