/*------------------------------------------------------------------------- * * dsm.c * manage dynamic shared memory segments * * This file provides a set of services to make programming with dynamic * shared memory segments more convenient. Unlike the low-level * facilities provided by dsm_impl.h and dsm_impl.c, mappings and segments * created using this module will be cleaned up automatically. Mappings * will be removed when the resource owner under which they were created * is cleaned up, unless dsm_pin_mapping() is used, in which case they * have session lifespan. Segments will be removed when there are no * remaining mappings, or at postmaster shutdown in any case. After a * hard postmaster crash, remaining segments will be removed, if they * still exist, at the next postmaster startup. * * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/storage/ipc/dsm.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include #include #ifndef WIN32 #include #endif #include #include "common/pg_prng.h" #include "lib/ilist.h" #include "miscadmin.h" #include "port/pg_bitutils.h" #include "storage/dsm.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pg_shmem.h" #include "storage/shmem.h" #include "utils/freepage.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner.h" #define PG_DYNSHMEM_CONTROL_MAGIC 0x9a503d32 #define PG_DYNSHMEM_FIXED_SLOTS 64 #define PG_DYNSHMEM_SLOTS_PER_BACKEND 5 #define INVALID_CONTROL_SLOT ((uint32) -1) /* Backend-local tracking for on-detach callbacks. */ typedef struct dsm_segment_detach_callback { on_dsm_detach_callback function; Datum arg; slist_node node; } dsm_segment_detach_callback; /* Backend-local state for a dynamic shared memory segment. */ struct dsm_segment { dlist_node node; /* List link in dsm_segment_list. */ ResourceOwner resowner; /* Resource owner. */ dsm_handle handle; /* Segment name. */ uint32 control_slot; /* Slot in control segment. */ void *impl_private; /* Implementation-specific private data. */ void *mapped_address; /* Mapping address, or NULL if unmapped. */ Size mapped_size; /* Size of our mapping. */ slist_head on_detach; /* On-detach callbacks. */ }; /* Shared-memory state for a dynamic shared memory segment. */ typedef struct dsm_control_item { dsm_handle handle; uint32 refcnt; /* 2+ = active, 1 = moribund, 0 = gone */ size_t first_page; size_t npages; void *impl_private_pm_handle; /* only needed on Windows */ bool pinned; } dsm_control_item; /* Layout of the dynamic shared memory control segment. */ typedef struct dsm_control_header { uint32 magic; uint32 nitems; uint32 maxitems; dsm_control_item item[FLEXIBLE_ARRAY_MEMBER]; } dsm_control_header; static void dsm_cleanup_for_mmap(void); static void dsm_postmaster_shutdown(int code, Datum arg); static dsm_segment *dsm_create_descriptor(void); static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size); static uint64 dsm_control_bytes_needed(uint32 nitems); static inline dsm_handle make_main_region_dsm_handle(int slot); static inline bool is_main_region_dsm_handle(dsm_handle handle); /* Has this backend initialized the dynamic shared memory system yet? */ static bool dsm_init_done = false; /* Preallocated DSM space in the main shared memory region. */ static void *dsm_main_space_begin = NULL; /* * List of dynamic shared memory segments used by this backend. * * At process exit time, we must decrement the reference count of each * segment we have attached; this list makes it possible to find all such * segments. * * This list should always be empty in the postmaster. We could probably * allow the postmaster to map dynamic shared memory segments before it * begins to start child processes, provided that each process adjusted * the reference counts for those segments in the control segment at * startup time, but there's no obvious need for such a facility, which * would also be complex to handle in the EXEC_BACKEND case. Once the * postmaster has begun spawning children, there's an additional problem: * each new mapping would require an update to the control segment, * which requires locking, in which the postmaster must not be involved. */ static dlist_head dsm_segment_list = DLIST_STATIC_INIT(dsm_segment_list); /* * Control segment information. * * Unlike ordinary shared memory segments, the control segment is not * reference counted; instead, it lasts for the postmaster's entire * life cycle. For simplicity, it doesn't have a dsm_segment object either. */ static dsm_handle dsm_control_handle; static dsm_control_header *dsm_control; static Size dsm_control_mapped_size = 0; static void *dsm_control_impl_private = NULL; /* ResourceOwner callbacks to hold DSM segments */ static void ResOwnerReleaseDSM(Datum res); static char *ResOwnerPrintDSM(Datum res); static const ResourceOwnerDesc dsm_resowner_desc = { .name = "dynamic shared memory segment", .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS, .release_priority = RELEASE_PRIO_DSMS, .ReleaseResource = ResOwnerReleaseDSM, .DebugPrint = ResOwnerPrintDSM }; /* Convenience wrappers over ResourceOwnerRemember/Forget */ static inline void ResourceOwnerRememberDSM(ResourceOwner owner, dsm_segment *seg) { ResourceOwnerRemember(owner, PointerGetDatum(seg), &dsm_resowner_desc); } static inline void ResourceOwnerForgetDSM(ResourceOwner owner, dsm_segment *seg) { ResourceOwnerForget(owner, PointerGetDatum(seg), &dsm_resowner_desc); } /* * Start up the dynamic shared memory system. * * This is called just once during each cluster lifetime, at postmaster * startup time. */ void dsm_postmaster_startup(PGShmemHeader *shim) { void *dsm_control_address = NULL; uint32 maxitems; Size segsize; Assert(!IsUnderPostmaster); /* * If we're using the mmap implementations, clean up any leftovers. * Cleanup isn't needed on Windows, and happens earlier in startup for * POSIX and System V shared memory, via a direct call to * dsm_cleanup_using_control_segment. */ if (dynamic_shared_memory_type == DSM_IMPL_MMAP) dsm_cleanup_for_mmap(); /* Determine size for new control segment. */ maxitems = PG_DYNSHMEM_FIXED_SLOTS + PG_DYNSHMEM_SLOTS_PER_BACKEND * MaxBackends; elog(DEBUG2, "dynamic shared memory system will support %u segments", maxitems); segsize = dsm_control_bytes_needed(maxitems); /* * Loop until we find an unused identifier for the new control segment. We * sometimes use DSM_HANDLE_INVALID as a sentinel value indicating "no * control segment", so avoid generating that value for a real handle. */ for (;;) { Assert(dsm_control_address == NULL); Assert(dsm_control_mapped_size == 0); /* Use even numbers only */ dsm_control_handle = pg_prng_uint32(&pg_global_prng_state) << 1; if (dsm_control_handle == DSM_HANDLE_INVALID) continue; if (dsm_impl_op(DSM_OP_CREATE, dsm_control_handle, segsize, &dsm_control_impl_private, &dsm_control_address, &dsm_control_mapped_size, ERROR)) break; } dsm_control = dsm_control_address; on_shmem_exit(dsm_postmaster_shutdown, PointerGetDatum(shim)); elog(DEBUG2, "created dynamic shared memory control segment %u (%zu bytes)", dsm_control_handle, segsize); shim->dsm_control = dsm_control_handle; /* Initialize control segment. */ dsm_control->magic = PG_DYNSHMEM_CONTROL_MAGIC; dsm_control->nitems = 0; dsm_control->maxitems = maxitems; } /* * Determine whether the control segment from the previous postmaster * invocation still exists. If so, remove the dynamic shared memory * segments to which it refers, and then the control segment itself. */ void dsm_cleanup_using_control_segment(dsm_handle old_control_handle) { void *mapped_address = NULL; void *junk_mapped_address = NULL; void *impl_private = NULL; void *junk_impl_private = NULL; Size mapped_size = 0; Size junk_mapped_size = 0; uint32 nitems; uint32 i; dsm_control_header *old_control; /* * Try to attach the segment. If this fails, it probably just means that * the operating system has been rebooted and the segment no longer * exists, or an unrelated process has used the same shm ID. So just fall * out quietly. */ if (!dsm_impl_op(DSM_OP_ATTACH, old_control_handle, 0, &impl_private, &mapped_address, &mapped_size, DEBUG1)) return; /* * We've managed to reattach it, but the contents might not be sane. If * they aren't, we disregard the segment after all. */ old_control = (dsm_control_header *) mapped_address; if (!dsm_control_segment_sane(old_control, mapped_size)) { dsm_impl_op(DSM_OP_DETACH, old_control_handle, 0, &impl_private, &mapped_address, &mapped_size, LOG); return; } /* * OK, the control segment looks basically valid, so we can use it to get * a list of segments that need to be removed. */ nitems = old_control->nitems; for (i = 0; i < nitems; ++i) { dsm_handle handle; uint32 refcnt; /* If the reference count is 0, the slot is actually unused. */ refcnt = old_control->item[i].refcnt; if (refcnt == 0) continue; /* If it was using the main shmem area, there is nothing to do. */ handle = old_control->item[i].handle; if (is_main_region_dsm_handle(handle)) continue; /* Log debugging information. */ elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u (reference count %u)", handle, refcnt); /* Destroy the referenced segment. */ dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, &junk_mapped_address, &junk_mapped_size, LOG); } /* Destroy the old control segment, too. */ elog(DEBUG2, "cleaning up dynamic shared memory control segment with ID %u", old_control_handle); dsm_impl_op(DSM_OP_DESTROY, old_control_handle, 0, &impl_private, &mapped_address, &mapped_size, LOG); } /* * When we're using the mmap shared memory implementation, "shared memory" * segments might even manage to survive an operating system reboot. * But there's no guarantee as to exactly what will survive: some segments * may survive, and others may not, and the contents of some may be out * of date. In particular, the control segment may be out of date, so we * can't rely on it to figure out what to remove. However, since we know * what directory contains the files we used as shared memory, we can simply * scan the directory and blow everything away that shouldn't be there. */ static void dsm_cleanup_for_mmap(void) { DIR *dir; struct dirent *dent; /* Scan the directory for something with a name of the correct format. */ dir = AllocateDir(PG_DYNSHMEM_DIR); while ((dent = ReadDir(dir, PG_DYNSHMEM_DIR)) != NULL) { if (strncmp(dent->d_name, PG_DYNSHMEM_MMAP_FILE_PREFIX, strlen(PG_DYNSHMEM_MMAP_FILE_PREFIX)) == 0) { char buf[MAXPGPATH + sizeof(PG_DYNSHMEM_DIR)]; snprintf(buf, sizeof(buf), PG_DYNSHMEM_DIR "/%s", dent->d_name); elog(DEBUG2, "removing file \"%s\"", buf); /* We found a matching file; so remove it. */ if (unlink(buf) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", buf))); } } /* Cleanup complete. */ FreeDir(dir); } /* * At shutdown time, we iterate over the control segment and remove all * remaining dynamic shared memory segments. We avoid throwing errors here; * the postmaster is shutting down either way, and this is just non-critical * resource cleanup. */ static void dsm_postmaster_shutdown(int code, Datum arg) { uint32 nitems; uint32 i; void *dsm_control_address; void *junk_mapped_address = NULL; void *junk_impl_private = NULL; Size junk_mapped_size = 0; PGShmemHeader *shim = (PGShmemHeader *) DatumGetPointer(arg); /* * If some other backend exited uncleanly, it might have corrupted the * control segment while it was dying. In that case, we warn and ignore * the contents of the control segment. This may end up leaving behind * stray shared memory segments, but there's not much we can do about that * if the metadata is gone. */ nitems = dsm_control->nitems; if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) { ereport(LOG, (errmsg("dynamic shared memory control segment is corrupt"))); return; } /* Remove any remaining segments. */ for (i = 0; i < nitems; ++i) { dsm_handle handle; /* If the reference count is 0, the slot is actually unused. */ if (dsm_control->item[i].refcnt == 0) continue; handle = dsm_control->item[i].handle; if (is_main_region_dsm_handle(handle)) continue; /* Log debugging information. */ elog(DEBUG2, "cleaning up orphaned dynamic shared memory with ID %u", handle); /* Destroy the segment. */ dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, &junk_mapped_address, &junk_mapped_size, LOG); } /* Remove the control segment itself. */ elog(DEBUG2, "cleaning up dynamic shared memory control segment with ID %u", dsm_control_handle); dsm_control_address = dsm_control; dsm_impl_op(DSM_OP_DESTROY, dsm_control_handle, 0, &dsm_control_impl_private, &dsm_control_address, &dsm_control_mapped_size, LOG); dsm_control = dsm_control_address; shim->dsm_control = 0; } /* * Prepare this backend for dynamic shared memory usage. Under EXEC_BACKEND, * we must reread the state file and map the control segment; in other cases, * we'll have inherited the postmaster's mapping and global variables. */ static void dsm_backend_startup(void) { #ifdef EXEC_BACKEND if (IsUnderPostmaster) { void *control_address = NULL; /* Attach control segment. */ Assert(dsm_control_handle != 0); dsm_impl_op(DSM_OP_ATTACH, dsm_control_handle, 0, &dsm_control_impl_private, &control_address, &dsm_control_mapped_size, ERROR); dsm_control = control_address; /* If control segment doesn't look sane, something is badly wrong. */ if (!dsm_control_segment_sane(dsm_control, dsm_control_mapped_size)) { dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, &dsm_control_impl_private, &control_address, &dsm_control_mapped_size, WARNING); ereport(FATAL, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("dynamic shared memory control segment is not valid"))); } } #endif dsm_init_done = true; } #ifdef EXEC_BACKEND /* * When running under EXEC_BACKEND, we get a callback here when the main * shared memory segment is re-attached, so that we can record the control * handle retrieved from it. */ void dsm_set_control_handle(dsm_handle h) { Assert(dsm_control_handle == 0 && h != 0); dsm_control_handle = h; } #endif /* * Reserve some space in the main shared memory segment for DSM segments. */ size_t dsm_estimate_size(void) { return 1024 * 1024 * (size_t) min_dynamic_shared_memory; } /* * Initialize space in the main shared memory segment for DSM segments. */ void dsm_shmem_init(void) { size_t size = dsm_estimate_size(); bool found; if (size == 0) return; dsm_main_space_begin = ShmemInitStruct("Preallocated DSM", size, &found); if (!found) { FreePageManager *fpm = (FreePageManager *) dsm_main_space_begin; size_t first_page = 0; size_t pages; /* Reserve space for the FreePageManager. */ while (first_page * FPM_PAGE_SIZE < sizeof(FreePageManager)) ++first_page; /* Initialize it and give it all the rest of the space. */ FreePageManagerInitialize(fpm, dsm_main_space_begin); pages = (size / FPM_PAGE_SIZE) - first_page; FreePageManagerPut(fpm, first_page, pages); } } /* * Create a new dynamic shared memory segment. * * If there is a non-NULL CurrentResourceOwner, the new segment is associated * with it and must be detached before the resource owner releases, or a * warning will be logged. If CurrentResourceOwner is NULL, the segment * remains attached until explicitly detached or the session ends. * Creating with a NULL CurrentResourceOwner is equivalent to creating * with a non-NULL CurrentResourceOwner and then calling dsm_pin_mapping. */ dsm_segment * dsm_create(Size size, int flags) { dsm_segment *seg; uint32 i; uint32 nitems; size_t npages = 0; size_t first_page = 0; FreePageManager *dsm_main_space_fpm = dsm_main_space_begin; bool using_main_dsm_region = false; /* * Unsafe in postmaster. It might seem pointless to allow use of dsm in * single user mode, but otherwise some subsystems will need dedicated * single user mode code paths. */ Assert(IsUnderPostmaster || !IsPostmasterEnvironment); if (!dsm_init_done) dsm_backend_startup(); /* Create a new segment descriptor. */ seg = dsm_create_descriptor(); /* * Lock the control segment while we try to allocate from the main shared * memory area, if configured. */ if (dsm_main_space_fpm) { npages = size / FPM_PAGE_SIZE; if (size % FPM_PAGE_SIZE > 0) ++npages; LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); if (FreePageManagerGet(dsm_main_space_fpm, npages, &first_page)) { /* We can carve out a piece of the main shared memory segment. */ seg->mapped_address = (char *) dsm_main_space_begin + first_page * FPM_PAGE_SIZE; seg->mapped_size = npages * FPM_PAGE_SIZE; using_main_dsm_region = true; /* We'll choose a handle below. */ } } if (!using_main_dsm_region) { /* * We need to create a new memory segment. Loop until we find an * unused segment identifier. */ if (dsm_main_space_fpm) LWLockRelease(DynamicSharedMemoryControlLock); for (;;) { Assert(seg->mapped_address == NULL && seg->mapped_size == 0); /* Use even numbers only */ seg->handle = pg_prng_uint32(&pg_global_prng_state) << 1; if (seg->handle == DSM_HANDLE_INVALID) /* Reserve sentinel */ continue; if (dsm_impl_op(DSM_OP_CREATE, seg->handle, size, &seg->impl_private, &seg->mapped_address, &seg->mapped_size, ERROR)) break; } LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); } /* Search the control segment for an unused slot. */ nitems = dsm_control->nitems; for (i = 0; i < nitems; ++i) { if (dsm_control->item[i].refcnt == 0) { if (using_main_dsm_region) { seg->handle = make_main_region_dsm_handle(i); dsm_control->item[i].first_page = first_page; dsm_control->item[i].npages = npages; } else Assert(!is_main_region_dsm_handle(seg->handle)); dsm_control->item[i].handle = seg->handle; /* refcnt of 1 triggers destruction, so start at 2 */ dsm_control->item[i].refcnt = 2; dsm_control->item[i].impl_private_pm_handle = NULL; dsm_control->item[i].pinned = false; seg->control_slot = i; LWLockRelease(DynamicSharedMemoryControlLock); return seg; } } /* Verify that we can support an additional mapping. */ if (nitems >= dsm_control->maxitems) { if (using_main_dsm_region) FreePageManagerPut(dsm_main_space_fpm, first_page, npages); LWLockRelease(DynamicSharedMemoryControlLock); if (!using_main_dsm_region) dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private, &seg->mapped_address, &seg->mapped_size, WARNING); if (seg->resowner != NULL) ResourceOwnerForgetDSM(seg->resowner, seg); dlist_delete(&seg->node); pfree(seg); if ((flags & DSM_CREATE_NULL_IF_MAXSEGMENTS) != 0) return NULL; ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_RESOURCES), errmsg("too many dynamic shared memory segments"))); } /* Enter the handle into a new array slot. */ if (using_main_dsm_region) { seg->handle = make_main_region_dsm_handle(nitems); dsm_control->item[i].first_page = first_page; dsm_control->item[i].npages = npages; } dsm_control->item[nitems].handle = seg->handle; /* refcnt of 1 triggers destruction, so start at 2 */ dsm_control->item[nitems].refcnt = 2; dsm_control->item[nitems].impl_private_pm_handle = NULL; dsm_control->item[nitems].pinned = false; seg->control_slot = nitems; dsm_control->nitems++; LWLockRelease(DynamicSharedMemoryControlLock); return seg; } /* * Attach a dynamic shared memory segment. * * See comments for dsm_segment_handle() for an explanation of how this * is intended to be used. * * This function will return NULL if the segment isn't known to the system. * This can happen if we're asked to attach the segment, but then everyone * else detaches it (causing it to be destroyed) before we get around to * attaching it. * * If there is a non-NULL CurrentResourceOwner, the attached segment is * associated with it and must be detached before the resource owner releases, * or a warning will be logged. Otherwise the segment remains attached until * explicitly detached or the session ends. See the note atop dsm_create(). */ dsm_segment * dsm_attach(dsm_handle h) { dsm_segment *seg; dlist_iter iter; uint32 i; uint32 nitems; /* Unsafe in postmaster (and pointless in a stand-alone backend). */ Assert(IsUnderPostmaster); if (!dsm_init_done) dsm_backend_startup(); /* * Since this is just a debugging cross-check, we could leave it out * altogether, or include it only in assert-enabled builds. But since the * list of attached segments should normally be very short, let's include * it always for right now. * * If you're hitting this error, you probably want to attempt to find an * existing mapping via dsm_find_mapping() before calling dsm_attach() to * create a new one. */ dlist_foreach(iter, &dsm_segment_list) { seg = dlist_container(dsm_segment, node, iter.cur); if (seg->handle == h) elog(ERROR, "can't attach the same segment more than once"); } /* Create a new segment descriptor. */ seg = dsm_create_descriptor(); seg->handle = h; /* Bump reference count for this segment in shared memory. */ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); nitems = dsm_control->nitems; for (i = 0; i < nitems; ++i) { /* * If the reference count is 0, the slot is actually unused. If the * reference count is 1, the slot is still in use, but the segment is * in the process of going away; even if the handle matches, another * slot may already have started using the same handle value by * coincidence so we have to keep searching. */ if (dsm_control->item[i].refcnt <= 1) continue; /* If the handle doesn't match, it's not the slot we want. */ if (dsm_control->item[i].handle != seg->handle) continue; /* Otherwise we've found a match. */ dsm_control->item[i].refcnt++; seg->control_slot = i; if (is_main_region_dsm_handle(seg->handle)) { seg->mapped_address = (char *) dsm_main_space_begin + dsm_control->item[i].first_page * FPM_PAGE_SIZE; seg->mapped_size = dsm_control->item[i].npages * FPM_PAGE_SIZE; } break; } LWLockRelease(DynamicSharedMemoryControlLock); /* * If we didn't find the handle we're looking for in the control segment, * it probably means that everyone else who had it mapped, including the * original creator, died before we got to this point. It's up to the * caller to decide what to do about that. */ if (seg->control_slot == INVALID_CONTROL_SLOT) { dsm_detach(seg); return NULL; } /* Here's where we actually try to map the segment. */ if (!is_main_region_dsm_handle(seg->handle)) dsm_impl_op(DSM_OP_ATTACH, seg->handle, 0, &seg->impl_private, &seg->mapped_address, &seg->mapped_size, ERROR); return seg; } /* * At backend shutdown time, detach any segments that are still attached. * (This is similar to dsm_detach_all, except that there's no reason to * unmap the control segment before exiting, so we don't bother.) */ void dsm_backend_shutdown(void) { while (!dlist_is_empty(&dsm_segment_list)) { dsm_segment *seg; seg = dlist_head_element(dsm_segment, node, &dsm_segment_list); dsm_detach(seg); } } /* * Detach all shared memory segments, including the control segments. This * should be called, along with PGSharedMemoryDetach, in processes that * might inherit mappings but are not intended to be connected to dynamic * shared memory. */ void dsm_detach_all(void) { void *control_address = dsm_control; while (!dlist_is_empty(&dsm_segment_list)) { dsm_segment *seg; seg = dlist_head_element(dsm_segment, node, &dsm_segment_list); dsm_detach(seg); } if (control_address != NULL) dsm_impl_op(DSM_OP_DETACH, dsm_control_handle, 0, &dsm_control_impl_private, &control_address, &dsm_control_mapped_size, ERROR); } /* * Detach from a shared memory segment, destroying the segment if we * remove the last reference. * * This function should never fail. It will often be invoked when aborting * a transaction, and a further error won't serve any purpose. It's not a * complete disaster if we fail to unmap or destroy the segment; it means a * resource leak, but that doesn't necessarily preclude further operations. */ void dsm_detach(dsm_segment *seg) { /* * Invoke registered callbacks. Just in case one of those callbacks * throws a further error that brings us back here, pop the callback * before invoking it, to avoid infinite error recursion. Don't allow * interrupts while running the individual callbacks in non-error code * paths, to avoid leaving cleanup work unfinished if we're interrupted by * a statement timeout or similar. */ HOLD_INTERRUPTS(); while (!slist_is_empty(&seg->on_detach)) { slist_node *node; dsm_segment_detach_callback *cb; on_dsm_detach_callback function; Datum arg; node = slist_pop_head_node(&seg->on_detach); cb = slist_container(dsm_segment_detach_callback, node, node); function = cb->function; arg = cb->arg; pfree(cb); function(seg, arg); } RESUME_INTERRUPTS(); /* * Try to remove the mapping, if one exists. Normally, there will be, but * maybe not, if we failed partway through a create or attach operation. * We remove the mapping before decrementing the reference count so that * the process that sees a zero reference count can be certain that no * remaining mappings exist. Even if this fails, we pretend that it * works, because retrying is likely to fail in the same way. */ if (seg->mapped_address != NULL) { if (!is_main_region_dsm_handle(seg->handle)) dsm_impl_op(DSM_OP_DETACH, seg->handle, 0, &seg->impl_private, &seg->mapped_address, &seg->mapped_size, WARNING); seg->impl_private = NULL; seg->mapped_address = NULL; seg->mapped_size = 0; } /* Reduce reference count, if we previously increased it. */ if (seg->control_slot != INVALID_CONTROL_SLOT) { uint32 refcnt; uint32 control_slot = seg->control_slot; LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); Assert(dsm_control->item[control_slot].handle == seg->handle); Assert(dsm_control->item[control_slot].refcnt > 1); refcnt = --dsm_control->item[control_slot].refcnt; seg->control_slot = INVALID_CONTROL_SLOT; LWLockRelease(DynamicSharedMemoryControlLock); /* If new reference count is 1, try to destroy the segment. */ if (refcnt == 1) { /* A pinned segment should never reach 1. */ Assert(!dsm_control->item[control_slot].pinned); /* * If we fail to destroy the segment here, or are killed before we * finish doing so, the reference count will remain at 1, which * will mean that nobody else can attach to the segment. At * postmaster shutdown time, or when a new postmaster is started * after a hard kill, another attempt will be made to remove the * segment. * * The main case we're worried about here is being killed by a * signal before we can finish removing the segment. In that * case, it's important to be sure that the segment still gets * removed. If we actually fail to remove the segment for some * other reason, the postmaster may not have any better luck than * we did. There's not much we can do about that, though. */ if (is_main_region_dsm_handle(seg->handle) || dsm_impl_op(DSM_OP_DESTROY, seg->handle, 0, &seg->impl_private, &seg->mapped_address, &seg->mapped_size, WARNING)) { LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); if (is_main_region_dsm_handle(seg->handle)) FreePageManagerPut((FreePageManager *) dsm_main_space_begin, dsm_control->item[control_slot].first_page, dsm_control->item[control_slot].npages); Assert(dsm_control->item[control_slot].handle == seg->handle); Assert(dsm_control->item[control_slot].refcnt == 1); dsm_control->item[control_slot].refcnt = 0; LWLockRelease(DynamicSharedMemoryControlLock); } } } /* Clean up our remaining backend-private data structures. */ if (seg->resowner != NULL) ResourceOwnerForgetDSM(seg->resowner, seg); dlist_delete(&seg->node); pfree(seg); } /* * Keep a dynamic shared memory mapping until end of session. * * By default, mappings are owned by the current resource owner, which * typically means they stick around for the duration of the current query * only. */ void dsm_pin_mapping(dsm_segment *seg) { if (seg->resowner != NULL) { ResourceOwnerForgetDSM(seg->resowner, seg); seg->resowner = NULL; } } /* * Arrange to remove a dynamic shared memory mapping at cleanup time. * * dsm_pin_mapping() can be used to preserve a mapping for the entire * lifetime of a process; this function reverses that decision, making * the segment owned by the current resource owner. This may be useful * just before performing some operation that will invalidate the segment * for future use by this backend. */ void dsm_unpin_mapping(dsm_segment *seg) { Assert(seg->resowner == NULL); ResourceOwnerEnlarge(CurrentResourceOwner); seg->resowner = CurrentResourceOwner; ResourceOwnerRememberDSM(seg->resowner, seg); } /* * Keep a dynamic shared memory segment until postmaster shutdown, or until * dsm_unpin_segment is called. * * This function should not be called more than once per segment, unless the * segment is explicitly unpinned with dsm_unpin_segment in between calls. * * Note that this function does not arrange for the current process to * keep the segment mapped indefinitely; if that behavior is desired, * dsm_pin_mapping() should be used from each process that needs to * retain the mapping. */ void dsm_pin_segment(dsm_segment *seg) { void *handle = NULL; /* * Bump reference count for this segment in shared memory. This will * ensure that even if there is no session which is attached to this * segment, it will remain until postmaster shutdown or an explicit call * to unpin. */ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); if (dsm_control->item[seg->control_slot].pinned) elog(ERROR, "cannot pin a segment that is already pinned"); if (!is_main_region_dsm_handle(seg->handle)) dsm_impl_pin_segment(seg->handle, seg->impl_private, &handle); dsm_control->item[seg->control_slot].pinned = true; dsm_control->item[seg->control_slot].refcnt++; dsm_control->item[seg->control_slot].impl_private_pm_handle = handle; LWLockRelease(DynamicSharedMemoryControlLock); } /* * Unpin a dynamic shared memory segment that was previously pinned with * dsm_pin_segment. This function should not be called unless dsm_pin_segment * was previously called for this segment. * * The argument is a dsm_handle rather than a dsm_segment in case you want * to unpin a segment to which you haven't attached. This turns out to be * useful if, for example, a reference to one shared memory segment is stored * within another shared memory segment. You might want to unpin the * referenced segment before destroying the referencing segment. */ void dsm_unpin_segment(dsm_handle handle) { uint32 control_slot = INVALID_CONTROL_SLOT; bool destroy = false; uint32 i; /* Find the control slot for the given handle. */ LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); for (i = 0; i < dsm_control->nitems; ++i) { /* Skip unused slots and segments that are concurrently going away. */ if (dsm_control->item[i].refcnt <= 1) continue; /* If we've found our handle, we can stop searching. */ if (dsm_control->item[i].handle == handle) { control_slot = i; break; } } /* * We should definitely have found the slot, and it should not already be * in the process of going away, because this function should only be * called on a segment which is pinned. */ if (control_slot == INVALID_CONTROL_SLOT) elog(ERROR, "cannot unpin unknown segment handle"); if (!dsm_control->item[control_slot].pinned) elog(ERROR, "cannot unpin a segment that is not pinned"); Assert(dsm_control->item[control_slot].refcnt > 1); /* * Allow implementation-specific code to run. We have to do this before * releasing the lock, because impl_private_pm_handle may get modified by * dsm_impl_unpin_segment. */ if (!is_main_region_dsm_handle(handle)) dsm_impl_unpin_segment(handle, &dsm_control->item[control_slot].impl_private_pm_handle); /* Note that 1 means no references (0 means unused slot). */ if (--dsm_control->item[control_slot].refcnt == 1) destroy = true; dsm_control->item[control_slot].pinned = false; /* Now we can release the lock. */ LWLockRelease(DynamicSharedMemoryControlLock); /* Clean up resources if that was the last reference. */ if (destroy) { void *junk_impl_private = NULL; void *junk_mapped_address = NULL; Size junk_mapped_size = 0; /* * For an explanation of how error handling works in this case, see * comments in dsm_detach. Note that if we reach this point, the * current process certainly does not have the segment mapped, because * if it did, the reference count would have still been greater than 1 * even after releasing the reference count held by the pin. The fact * that there can't be a dsm_segment for this handle makes it OK to * pass the mapped size, mapped address, and private data as NULL * here. */ if (is_main_region_dsm_handle(handle) || dsm_impl_op(DSM_OP_DESTROY, handle, 0, &junk_impl_private, &junk_mapped_address, &junk_mapped_size, WARNING)) { LWLockAcquire(DynamicSharedMemoryControlLock, LW_EXCLUSIVE); if (is_main_region_dsm_handle(handle)) FreePageManagerPut((FreePageManager *) dsm_main_space_begin, dsm_control->item[control_slot].first_page, dsm_control->item[control_slot].npages); Assert(dsm_control->item[control_slot].handle == handle); Assert(dsm_control->item[control_slot].refcnt == 1); dsm_control->item[control_slot].refcnt = 0; LWLockRelease(DynamicSharedMemoryControlLock); } } } /* * Find an existing mapping for a shared memory segment, if there is one. */ dsm_segment * dsm_find_mapping(dsm_handle handle) { dlist_iter iter; dsm_segment *seg; dlist_foreach(iter, &dsm_segment_list) { seg = dlist_container(dsm_segment, node, iter.cur); if (seg->handle == handle) return seg; } return NULL; } /* * Get the address at which a dynamic shared memory segment is mapped. */ void * dsm_segment_address(dsm_segment *seg) { Assert(seg->mapped_address != NULL); return seg->mapped_address; } /* * Get the size of a mapping. */ Size dsm_segment_map_length(dsm_segment *seg) { Assert(seg->mapped_address != NULL); return seg->mapped_size; } /* * Get a handle for a mapping. * * To establish communication via dynamic shared memory between two backends, * one of them should first call dsm_create() to establish a new shared * memory mapping. That process should then call dsm_segment_handle() to * obtain a handle for the mapping, and pass that handle to the * coordinating backend via some means (e.g. bgw_main_arg, or via the * main shared memory segment). The recipient, once in possession of the * handle, should call dsm_attach(). */ dsm_handle dsm_segment_handle(dsm_segment *seg) { return seg->handle; } /* * Register an on-detach callback for a dynamic shared memory segment. */ void on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg) { dsm_segment_detach_callback *cb; cb = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment_detach_callback)); cb->function = function; cb->arg = arg; slist_push_head(&seg->on_detach, &cb->node); } /* * Unregister an on-detach callback for a dynamic shared memory segment. */ void cancel_on_dsm_detach(dsm_segment *seg, on_dsm_detach_callback function, Datum arg) { slist_mutable_iter iter; slist_foreach_modify(iter, &seg->on_detach) { dsm_segment_detach_callback *cb; cb = slist_container(dsm_segment_detach_callback, node, iter.cur); if (cb->function == function && cb->arg == arg) { slist_delete_current(&iter); pfree(cb); break; } } } /* * Discard all registered on-detach callbacks without executing them. */ void reset_on_dsm_detach(void) { dlist_iter iter; dlist_foreach(iter, &dsm_segment_list) { dsm_segment *seg = dlist_container(dsm_segment, node, iter.cur); /* Throw away explicit on-detach actions one by one. */ while (!slist_is_empty(&seg->on_detach)) { slist_node *node; dsm_segment_detach_callback *cb; node = slist_pop_head_node(&seg->on_detach); cb = slist_container(dsm_segment_detach_callback, node, node); pfree(cb); } /* * Decrementing the reference count is a sort of implicit on-detach * action; make sure we don't do that, either. */ seg->control_slot = INVALID_CONTROL_SLOT; } } /* * Create a segment descriptor. */ static dsm_segment * dsm_create_descriptor(void) { dsm_segment *seg; if (CurrentResourceOwner) ResourceOwnerEnlarge(CurrentResourceOwner); seg = MemoryContextAlloc(TopMemoryContext, sizeof(dsm_segment)); dlist_push_head(&dsm_segment_list, &seg->node); /* seg->handle must be initialized by the caller */ seg->control_slot = INVALID_CONTROL_SLOT; seg->impl_private = NULL; seg->mapped_address = NULL; seg->mapped_size = 0; seg->resowner = CurrentResourceOwner; if (CurrentResourceOwner) ResourceOwnerRememberDSM(CurrentResourceOwner, seg); slist_init(&seg->on_detach); return seg; } /* * Sanity check a control segment. * * The goal here isn't to detect everything that could possibly be wrong with * the control segment; there's not enough information for that. Rather, the * goal is to make sure that someone can iterate over the items in the segment * without overrunning the end of the mapping and crashing. We also check * the magic number since, if that's messed up, this may not even be one of * our segments at all. */ static bool dsm_control_segment_sane(dsm_control_header *control, Size mapped_size) { if (mapped_size < offsetof(dsm_control_header, item)) return false; /* Mapped size too short to read header. */ if (control->magic != PG_DYNSHMEM_CONTROL_MAGIC) return false; /* Magic number doesn't match. */ if (dsm_control_bytes_needed(control->maxitems) > mapped_size) return false; /* Max item count won't fit in map. */ if (control->nitems > control->maxitems) return false; /* Overfull. */ return true; } /* * Compute the number of control-segment bytes needed to store a given * number of items. */ static uint64 dsm_control_bytes_needed(uint32 nitems) { return offsetof(dsm_control_header, item) + sizeof(dsm_control_item) * (uint64) nitems; } static inline dsm_handle make_main_region_dsm_handle(int slot) { dsm_handle handle; /* * We need to create a handle that doesn't collide with any existing extra * segment created by dsm_impl_op(), so we'll make it odd. It also * mustn't collide with any other main area pseudo-segment, so we'll * include the slot number in some of the bits. We also want to make an * effort to avoid newly created and recently destroyed handles from being * confused, so we'll make the rest of the bits random. */ handle = 1; handle |= slot << 1; handle |= pg_prng_uint32(&pg_global_prng_state) << (pg_leftmost_one_pos32(dsm_control->maxitems) + 1); return handle; } static inline bool is_main_region_dsm_handle(dsm_handle handle) { return handle & 1; } /* ResourceOwner callbacks */ static void ResOwnerReleaseDSM(Datum res) { dsm_segment *seg = (dsm_segment *) DatumGetPointer(res); seg->resowner = NULL; dsm_detach(seg); } static char * ResOwnerPrintDSM(Datum res) { dsm_segment *seg = (dsm_segment *) DatumGetPointer(res); return psprintf("dynamic shared memory segment %u", dsm_segment_handle(seg)); }