From bea3d7e3831fa6a1395eadbad7d97cebc7aa8aee Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sat, 8 Apr 2023 21:52:35 +1200 Subject: [PATCH] Use MemoryContext API for regex memory management. Previously, regex_t objects' memory was managed with malloc() and free() directly. Switch to palloc()-based memory management instead. Advantages: * memory used by cached regexes is now visible with MemoryContext observability tools * cleanup can be done automatically in certain failure modes (something that later commits will take advantage of) * cleanup can be done in bulk On the downside, there may be more fragmentation (wasted memory) due to per-regex MemoryContext objects. This is a problem shared with other cached objects in PostgreSQL and can probably be improved with later tuning. Thanks to Noah Misch for suggesting this general approach, which unblocks later work on interrupts. Suggested-by: Noah Misch Reviewed-by: Tom Lane Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com --- src/backend/regex/regprefix.c | 2 +- src/backend/utils/adt/regexp.c | 59 ++++++++++++++++++++++++---------- src/include/regex/regcustom.h | 6 ++-- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/backend/regex/regprefix.c b/src/backend/regex/regprefix.c index 221f02da63..c09b2a9778 100644 --- a/src/backend/regex/regprefix.c +++ b/src/backend/regex/regprefix.c @@ -32,7 +32,7 @@ static int findprefix(struct cnfa *cnfa, struct colormap *cm, * REG_EXACT: all strings satisfying the regex must match the same string * or a REG_XXX error code * - * In the non-failure cases, *string is set to a malloc'd string containing + * In the non-failure cases, *string is set to a palloc'd string containing * the common prefix or exact value, of length *slength (measured in chrs * not bytes!). * diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 810dcb85b6..45280902d6 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -96,9 +96,13 @@ typedef struct regexp_matches_ctx #define MAX_CACHED_RES 32 #endif +/* A parent memory context for regular expressions. */ +static MemoryContext RegexpCacheMemoryContext; + /* this structure describes one cached regular expression */ typedef struct cached_re_str { + MemoryContext cre_context; /* memory context for this regexp */ char *cre_pat; /* original RE (not null terminated!) */ int cre_pat_len; /* length of original RE, in bytes */ int cre_flags; /* compile flags: extended,icase etc */ @@ -145,6 +149,7 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation) int regcomp_result; cached_re_str re_temp; char errMsg[100]; + MemoryContext oldcontext; /* * Look for a match among previously compiled REs. Since the data @@ -172,6 +177,13 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation) } } + /* Set up the cache memory on first go through. */ + if (unlikely(RegexpCacheMemoryContext == NULL)) + RegexpCacheMemoryContext = + AllocSetContextCreate(TopMemoryContext, + "RegexpCacheMemoryContext", + ALLOCSET_SMALL_SIZES); + /* * Couldn't find it, so try to compile the new RE. To avoid leaking * resources on failure, we build into the re_temp local. @@ -183,6 +195,18 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation) pattern, text_re_len); + /* + * Make a memory context for this compiled regexp. This is initially a + * child of the current memory context, so it will be cleaned up + * automatically if compilation is interrupted and throws an ERROR. We'll + * re-parent it under the longer lived cache context if we make it to the + * bottom of this function. + */ + re_temp.cre_context = AllocSetContextCreate(CurrentMemoryContext, + "RegexpMemoryContext", + ALLOCSET_SMALL_SIZES); + oldcontext = MemoryContextSwitchTo(re_temp.cre_context); + regcomp_result = pg_regcomp(&re_temp.cre_re, pattern, pattern_len, @@ -209,21 +233,17 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation) errmsg("invalid regular expression: %s", errMsg))); } - /* - * We use malloc/free for the cre_pat field because the storage has to - * persist across transactions, and because we want to get control back on - * out-of-memory. The Max() is because some malloc implementations return - * NULL for malloc(0). - */ - re_temp.cre_pat = malloc(Max(text_re_len, 1)); - if (re_temp.cre_pat == NULL) - { - pg_regfree(&re_temp.cre_re); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - } + /* Copy the pattern into the per-regexp memory context. */ + re_temp.cre_pat = palloc(text_re_len + 1); memcpy(re_temp.cre_pat, text_re_val, text_re_len); + + /* + * NUL-terminate it only for the benefit of the identifier used for the + * memory context, visible in the pg_backend_memory_contexts view. + */ + re_temp.cre_pat[text_re_len] = 0; + MemoryContextSetIdentifier(re_temp.cre_context, re_temp.cre_pat); + re_temp.cre_pat_len = text_re_len; re_temp.cre_flags = cflags; re_temp.cre_collation = collation; @@ -236,16 +256,21 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation) { --num_res; Assert(num_res < MAX_CACHED_RES); - pg_regfree(&re_array[num_res].cre_re); - free(re_array[num_res].cre_pat); + /* Delete the memory context holding the regexp and pattern. */ + MemoryContextDelete(re_array[num_res].cre_context); } + /* Re-parent the memory context to our long-lived cache context. */ + MemoryContextSetParent(re_temp.cre_context, RegexpCacheMemoryContext); + if (num_res > 0) memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str)); re_array[0] = re_temp; num_res++; + MemoryContextSwitchTo(oldcontext); + return &re_array[0].cre_re; } @@ -1990,7 +2015,7 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation, slen = pg_wchar2mb_with_len(str, result, slen); Assert(slen < maxlen); - free(str); + pfree(str); return result; } diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h index fc158e1bb7..8f4025128e 100644 --- a/src/include/regex/regcustom.h +++ b/src/include/regex/regcustom.h @@ -49,9 +49,9 @@ /* overrides for regguts.h definitions, if any */ #define FUNCPTR(name, args) (*name) args -#define MALLOC(n) malloc(n) -#define FREE(p) free(VS(p)) -#define REALLOC(p,n) realloc(VS(p),n) +#define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM) +#define FREE(p) pfree(VS(p)) +#define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM) #define assert(x) Assert(x) /* internal character type and related */