From 2cdf131c46e631addfc386f6106e52a1b8cc3a70 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 29 Nov 2023 01:41:48 +0200 Subject: [PATCH] Use larger segment file names for pg_notify This avoids the wraparound in async.c and removes the corresponding code complexity. The maximum amount of allocated SLRU pages for NOTIFY / LISTEN queue is now determined by the max_notify_queue_pages GUC. The default value is 1048576. It allows to consume up to 8 GB of disk space which is exactly the limit we had previously. Author: Maxim Orlov, Aleksander Alekseev, Alexander Korotkov, Teodor Sigaev Author: Nikita Glukhov, Pavel Borisov, Yura Sokolov Reviewed-by: Jacob Champion, Heikki Linnakangas, Alexander Korotkov Reviewed-by: Japin Li, Pavel Borisov, Tom Lane, Peter Eisentraut, Andres Freund Reviewed-by: Andrey Borodin, Dilip Kumar, Aleksander Alekseev Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com --- doc/src/sgml/config.sgml | 16 +++ doc/src/sgml/ref/listen.sgml | 1 + doc/src/sgml/ref/notify.sgml | 1 + src/backend/commands/async.c | 122 +++++------------- src/backend/utils/misc/guc_tables.c | 10 ++ src/backend/utils/misc/postgresql.conf.sample | 3 + src/include/commands/async.h | 1 + 7 files changed, 62 insertions(+), 92 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 93735e3aea..94d1eb2b81 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2151,6 +2151,22 @@ include_dir 'conf.d' + + max_notify_queue_pages (integer) + + max_notify_queue_pages configuration parameter + + + + + Specifies the maximum amount of allocated pages for + / queue. + The default value is 1048576. For 8 KB pages it allows to consume + up to 8 GB of disk space. + + + + diff --git a/doc/src/sgml/ref/listen.sgml b/doc/src/sgml/ref/listen.sgml index 2fab9d65a1..6c1f09bd45 100644 --- a/doc/src/sgml/ref/listen.sgml +++ b/doc/src/sgml/ref/listen.sgml @@ -148,6 +148,7 @@ Asynchronous notification "virtual" received from server process with PID 8448. + diff --git a/doc/src/sgml/ref/notify.sgml b/doc/src/sgml/ref/notify.sgml index d7dcbea02d..fd6ed54e8f 100644 --- a/doc/src/sgml/ref/notify.sgml +++ b/doc/src/sgml/ref/notify.sgml @@ -228,6 +228,7 @@ Asynchronous notification "foo" with payload "payload" received from server proc + diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 346bc28a36..2651d8904b 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -103,12 +103,11 @@ * until we reach either a notification from an uncommitted transaction or * the head pointer's position. * - * 6. To avoid SLRU wraparound and limit disk space consumption, the tail - * pointer needs to be advanced so that old pages can be truncated. - * This is relatively expensive (notably, it requires an exclusive lock), - * so we don't want to do it often. We make sending backends do this work - * if they advanced the queue head into a new page, but only once every - * QUEUE_CLEANUP_DELAY pages. + * 6. To limit disk space consumption, the tail pointer needs to be advanced + * so that old pages can be truncated. This is relatively expensive + * (notably, it requires an exclusive lock), so we don't want to do it + * often. We make sending backends do this work if they advanced the queue + * head into a new page, but only once every QUEUE_CLEANUP_DELAY pages. * * An application that listens on the same channel it notifies will get * NOTIFY messages for its own NOTIFYs. These can be ignored, if not useful, @@ -120,7 +119,7 @@ * The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS) * can be varied without affecting anything but performance. The maximum * amount of notification data that can be queued at one time is determined - * by slru.c's wraparound limit; see QUEUE_MAX_PAGE below. + * by max_notify_queue_pages GUC. *------------------------------------------------------------------------- */ @@ -312,23 +311,8 @@ static SlruCtlData NotifyCtlData; #define NotifyCtl (&NotifyCtlData) #define QUEUE_PAGESIZE BLCKSZ -#define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */ -/* - * Use segments 0000 through FFFF. Each contains SLRU_PAGES_PER_SEGMENT pages - * which gives us the pages from 0 to SLRU_PAGES_PER_SEGMENT * 0x10000 - 1. - * We could use as many segments as SlruScanDirectory() allows, but this gives - * us so much space already that it doesn't seem worth the trouble. - * - * The most data we can have in the queue at a time is QUEUE_MAX_PAGE/2 - * pages, because more than that would confuse slru.c into thinking there - * was a wraparound condition. With the default BLCKSZ this means there - * can be up to 8GB of queued-and-not-read data. - * - * Note: it's possible to redefine QUEUE_MAX_PAGE with a smaller multiple of - * SLRU_PAGES_PER_SEGMENT, for easier testing of queue-full behaviour. - */ -#define QUEUE_MAX_PAGE (SLRU_PAGES_PER_SEGMENT * 0x10000 - 1) +#define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */ /* * listenChannels identifies the channels we are actually listening to @@ -439,12 +423,15 @@ static bool amRegisteredListener = false; /* have we advanced to a page that's a multiple of QUEUE_CLEANUP_DELAY? */ static bool tryAdvanceTail = false; -/* GUC parameter */ +/* GUC parameters */ bool Trace_notify = false; +/* For 8 KB pages this gives 8 GB of disk space */ +int max_notify_queue_pages = 1048576; + /* local function prototypes */ -static int64 asyncQueuePageDiff(int64 p, int64 q); -static bool asyncQueuePagePrecedes(int64 p, int64 q); +static inline int64 asyncQueuePageDiff(int64 p, int64 q); +static inline bool asyncQueuePagePrecedes(int64 p, int64 q); static void queue_listen(ListenActionKind action, const char *channel); static void Async_UnlistenOnExit(int code, Datum arg); static void Exec_ListenPreCommit(void); @@ -474,39 +461,23 @@ static int notification_match(const void *key1, const void *key2, Size keysize); static void ClearPendingActionsAndNotifies(void); /* - * Compute the difference between two queue page numbers (i.e., p - q), - * accounting for wraparound. + * Compute the difference between two queue page numbers. + * Previously this function accounted for a wraparound. */ -static int64 +static inline int64 asyncQueuePageDiff(int64 p, int64 q) { - int64 diff; - - /* - * We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be - * in the range 0..QUEUE_MAX_PAGE. - */ - Assert(p >= 0 && p <= QUEUE_MAX_PAGE); - Assert(q >= 0 && q <= QUEUE_MAX_PAGE); - - diff = p - q; - if (diff >= ((QUEUE_MAX_PAGE + 1) / 2)) - diff -= QUEUE_MAX_PAGE + 1; - else if (diff < -((QUEUE_MAX_PAGE + 1) / 2)) - diff += QUEUE_MAX_PAGE + 1; - return diff; + return p - q; } /* - * Is p < q, accounting for wraparound? - * - * Since asyncQueueIsFull() blocks creation of a page that could precede any - * extant page, we need not assess entries within a page. + * Determines whether p precedes q. + * Previously this function accounted for a wraparound. */ -static bool +static inline bool asyncQueuePagePrecedes(int64 p, int64 q) { - return asyncQueuePageDiff(p, q) < 0; + return p < q; } /* @@ -566,12 +537,13 @@ AsyncShmemInit(void) } /* - * Set up SLRU management of the pg_notify data. + * Set up SLRU management of the pg_notify data. Note that long segment + * names are used in order to avoid wraparound. */ NotifyCtl->PagePrecedes = asyncQueuePagePrecedes; SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0, NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER, - SYNC_HANDLER_NONE, false); + SYNC_HANDLER_NONE, true); if (!found) { @@ -1305,27 +1277,11 @@ asyncQueueUnregister(void) static bool asyncQueueIsFull(void) { - int nexthead; - int boundary; + int headPage = QUEUE_POS_PAGE(QUEUE_HEAD); + int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL); + int occupied = headPage - tailPage; - /* - * The queue is full if creating a new head page would create a page that - * logically precedes the current global tail pointer, ie, the head - * pointer would wrap around compared to the tail. We cannot create such - * a head page for fear of confusing slru.c. For safety we round the tail - * pointer back to a segment boundary (truncation logic in - * asyncQueueAdvanceTail does not do this, so doing it here is optional). - * - * Note that this test is *not* dependent on how much space there is on - * the current head page. This is necessary because asyncQueueAddEntries - * might try to create the next head page in any case. - */ - nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1; - if (nexthead > QUEUE_MAX_PAGE) - nexthead = 0; /* wrap around */ - boundary = QUEUE_STOP_PAGE; - boundary -= boundary % SLRU_PAGES_PER_SEGMENT; - return asyncQueuePagePrecedes(nexthead, boundary); + return occupied >= max_notify_queue_pages; } /* @@ -1355,8 +1311,6 @@ asyncQueueAdvance(volatile QueuePosition *position, int entryLength) if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE) { pageno++; - if (pageno > QUEUE_MAX_PAGE) - pageno = 0; /* wrap around */ offset = 0; pageJump = true; } @@ -1433,9 +1387,6 @@ asyncQueueAddEntries(ListCell *nextNotify) * If this is the first write since the postmaster started, we need to * initialize the first page of the async SLRU. Otherwise, the current * page should be initialized already, so just fetch it. - * - * (We could also take the first path when the SLRU position has just - * wrapped around, but re-zeroing the page is harmless in that case.) */ pageno = QUEUE_POS_PAGE(queue_head); if (QUEUE_POS_IS_ZERO(queue_head)) @@ -1548,20 +1499,12 @@ asyncQueueUsage(void) { int headPage = QUEUE_POS_PAGE(QUEUE_HEAD); int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL); - int occupied; - - occupied = headPage - tailPage; + int occupied = headPage - tailPage; if (occupied == 0) return (double) 0; /* fast exit for common case */ - if (occupied < 0) - { - /* head has wrapped around, tail not yet */ - occupied += QUEUE_MAX_PAGE + 1; - } - - return (double) occupied / (double) ((QUEUE_MAX_PAGE + 1) / 2); + return (double) occupied / (double) max_notify_queue_pages; } /* @@ -2209,11 +2152,6 @@ asyncQueueAdvanceTail(void) */ SimpleLruTruncate(NotifyCtl, newtailpage); - /* - * Update QUEUE_STOP_PAGE. This changes asyncQueueIsFull()'s verdict - * for the segment immediately prior to the old tail, allowing fresh - * data into that segment. - */ LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE); QUEUE_STOP_PAGE = newtailpage; LWLockRelease(NotifyQueueLock); diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index b764ef6998..5c6f5af873 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2687,6 +2687,16 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"max_notify_queue_pages", PGC_POSTMASTER, RESOURCES_DISK, + gettext_noop("Sets the maximum number of allocated pages for NOTIFY / LISTEN queue."), + NULL, + }, + &max_notify_queue_pages, + 1048576, 64, INT_MAX, + NULL, NULL, NULL + }, + { {"wal_decode_buffer_size", PGC_POSTMASTER, WAL_RECOVERY, gettext_noop("Buffer size for reading ahead in the WAL during recovery."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e48c066a5b..cf9f283cfe 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -166,6 +166,9 @@ #temp_file_limit = -1 # limits per-process temp file space # in kilobytes, or -1 for no limit +#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated + # for NOTIFY / LISTEN queue + # - Kernel Resources - #max_files_per_process = 1000 # min 64 diff --git a/src/include/commands/async.h b/src/include/commands/async.h index 02da6ba7e1..a44472b352 100644 --- a/src/include/commands/async.h +++ b/src/include/commands/async.h @@ -21,6 +21,7 @@ #define NUM_NOTIFY_BUFFERS 8 extern PGDLLIMPORT bool Trace_notify; +extern PGDLLIMPORT int max_notify_queue_pages; extern PGDLLIMPORT volatile sig_atomic_t notifyInterruptPending; extern Size AsyncShmemSize(void);