Use MemoryContext API for regex memory management.

Previously, regex_t objects' memory was managed with malloc() and free()
directly.  Switch to palloc()-based memory management instead.
Advantages:

 * memory used by cached regexes is now visible with MemoryContext
   observability tools

 * cleanup can be done automatically in certain failure modes
   (something that later commits will take advantage of)

 * cleanup can be done in bulk

On the downside, there may be more fragmentation (wasted memory) due to
per-regex MemoryContext objects.  This is a problem shared with other
cached objects in PostgreSQL and can probably be improved with later
tuning.

Thanks to Noah Misch for suggesting this general approach, which
unblocks later work on interrupts.

Suggested-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com
This commit is contained in:
Thomas Munro 2023-04-08 21:52:35 +12:00
parent fcd77d5321
commit bea3d7e383
3 changed files with 46 additions and 21 deletions

View File

@ -32,7 +32,7 @@ static int findprefix(struct cnfa *cnfa, struct colormap *cm,
* REG_EXACT: all strings satisfying the regex must match the same string
* or a REG_XXX error code
*
* In the non-failure cases, *string is set to a malloc'd string containing
* In the non-failure cases, *string is set to a palloc'd string containing
* the common prefix or exact value, of length *slength (measured in chrs
* not bytes!).
*

View File

@ -96,9 +96,13 @@ typedef struct regexp_matches_ctx
#define MAX_CACHED_RES 32
#endif
/* A parent memory context for regular expressions. */
static MemoryContext RegexpCacheMemoryContext;
/* this structure describes one cached regular expression */
typedef struct cached_re_str
{
MemoryContext cre_context; /* memory context for this regexp */
char *cre_pat; /* original RE (not null terminated!) */
int cre_pat_len; /* length of original RE, in bytes */
int cre_flags; /* compile flags: extended,icase etc */
@ -145,6 +149,7 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
int regcomp_result;
cached_re_str re_temp;
char errMsg[100];
MemoryContext oldcontext;
/*
* Look for a match among previously compiled REs. Since the data
@ -172,6 +177,13 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
}
}
/* Set up the cache memory on first go through. */
if (unlikely(RegexpCacheMemoryContext == NULL))
RegexpCacheMemoryContext =
AllocSetContextCreate(TopMemoryContext,
"RegexpCacheMemoryContext",
ALLOCSET_SMALL_SIZES);
/*
* Couldn't find it, so try to compile the new RE. To avoid leaking
* resources on failure, we build into the re_temp local.
@ -183,6 +195,18 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
pattern,
text_re_len);
/*
* Make a memory context for this compiled regexp. This is initially a
* child of the current memory context, so it will be cleaned up
* automatically if compilation is interrupted and throws an ERROR. We'll
* re-parent it under the longer lived cache context if we make it to the
* bottom of this function.
*/
re_temp.cre_context = AllocSetContextCreate(CurrentMemoryContext,
"RegexpMemoryContext",
ALLOCSET_SMALL_SIZES);
oldcontext = MemoryContextSwitchTo(re_temp.cre_context);
regcomp_result = pg_regcomp(&re_temp.cre_re,
pattern,
pattern_len,
@ -209,21 +233,17 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
errmsg("invalid regular expression: %s", errMsg)));
}
/*
* We use malloc/free for the cre_pat field because the storage has to
* persist across transactions, and because we want to get control back on
* out-of-memory. The Max() is because some malloc implementations return
* NULL for malloc(0).
*/
re_temp.cre_pat = malloc(Max(text_re_len, 1));
if (re_temp.cre_pat == NULL)
{
pg_regfree(&re_temp.cre_re);
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
/* Copy the pattern into the per-regexp memory context. */
re_temp.cre_pat = palloc(text_re_len + 1);
memcpy(re_temp.cre_pat, text_re_val, text_re_len);
/*
* NUL-terminate it only for the benefit of the identifier used for the
* memory context, visible in the pg_backend_memory_contexts view.
*/
re_temp.cre_pat[text_re_len] = 0;
MemoryContextSetIdentifier(re_temp.cre_context, re_temp.cre_pat);
re_temp.cre_pat_len = text_re_len;
re_temp.cre_flags = cflags;
re_temp.cre_collation = collation;
@ -236,16 +256,21 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
{
--num_res;
Assert(num_res < MAX_CACHED_RES);
pg_regfree(&re_array[num_res].cre_re);
free(re_array[num_res].cre_pat);
/* Delete the memory context holding the regexp and pattern. */
MemoryContextDelete(re_array[num_res].cre_context);
}
/* Re-parent the memory context to our long-lived cache context. */
MemoryContextSetParent(re_temp.cre_context, RegexpCacheMemoryContext);
if (num_res > 0)
memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
re_array[0] = re_temp;
num_res++;
MemoryContextSwitchTo(oldcontext);
return &re_array[0].cre_re;
}
@ -1990,7 +2015,7 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
slen = pg_wchar2mb_with_len(str, result, slen);
Assert(slen < maxlen);
free(str);
pfree(str);
return result;
}

View File

@ -49,9 +49,9 @@
/* overrides for regguts.h definitions, if any */
#define FUNCPTR(name, args) (*name) args
#define MALLOC(n) malloc(n)
#define FREE(p) free(VS(p))
#define REALLOC(p,n) realloc(VS(p),n)
#define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM)
#define FREE(p) pfree(VS(p))
#define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM)
#define assert(x) Assert(x)
/* internal character type and related */