Refactor regular expression handling in hba.c

AuthToken gains a regular expression, and IdentLine is changed so as it
uses an AuthToken rather than tracking separately the ident user string
used for the regex compilation and its generated regex_t.  In the case
of pg_ident.conf, a set of AuthTokens is built in the pre-parsing phase
of the file, and an extra regular expression is compiled when building
the list of IdentLines, after checking the sanity of the fields in a
pre-parsed entry.

The logic in charge of computing and executing regular expressions is
now done in a new set of routines called respectively
regcomp_auth_token() and regexec_auth_token() that are wrappers around
pg_regcomp() and pg_regexec(), working on AuthTokens.  While on it, this
patch adds a routine able to free an AuthToken, free_auth_token(), to
simplify a bit the logic around the requirement of using a specific free
routine for computed regular expressions.  Note that there are no
functional or behavior changes introduced by this commit.

The goal of this patch is to ease the use of regular expressions with
more items of pg_hba.conf (user list, database list, potentially
hostnames) where AuthTokens are used extensively.  This will be tackled
later in a separate patch.

Author: Bertrand Drouvot, Michael Paquier
Discussion: https://postgr.es/m/fff0d7c1-8ad4-76a1-9db3-0ab6ec338bf7@amazon.com
This commit is contained in:
Michael Paquier 2022-10-19 10:08:49 +09:00
parent 8bf66dedd8
commit fc579e11c6
3 changed files with 119 additions and 71 deletions

View File

@ -66,6 +66,7 @@ typedef struct check_network_data
} check_network_data;
#define token_has_regexp(t) (t->regex != NULL)
#define token_is_keyword(t, k) (!t->quoted && strcmp(t->string, k) == 0)
#define token_matches(t, k) (strcmp(t->string, k) == 0)
@ -80,9 +81,10 @@ static MemoryContext parsed_hba_context = NULL;
* pre-parsed content of ident mapping file: list of IdentLine structs.
* parsed_ident_context is the memory context where it lives.
*
* NOTE: the IdentLine structs can contain pre-compiled regular expressions
* that live outside the memory context. Before destroying or resetting the
* memory context, they need to be explicitly free'd.
* NOTE: the IdentLine structs can contain AuthTokens with pre-compiled
* regular expressions that live outside the memory context. Before
* destroying or resetting the memory context, they need to be explicitly
* free'd.
*/
static List *parsed_ident_lines = NIL;
static MemoryContext parsed_ident_context = NULL;
@ -117,6 +119,9 @@ static List *tokenize_inc_file(List *tokens, const char *outer_filename,
const char *inc_filename, int elevel, char **err_msg);
static bool parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline,
int elevel, char **err_msg);
static int regcomp_auth_token(AuthToken *token);
static int regexec_auth_token(const char *match, AuthToken *token,
size_t nmatch, regmatch_t pmatch[]);
/*
@ -267,14 +272,26 @@ make_auth_token(const char *token, bool quoted)
toklen = strlen(token);
/* we copy string into same palloc block as the struct */
authtoken = (AuthToken *) palloc(sizeof(AuthToken) + toklen + 1);
authtoken = (AuthToken *) palloc0(sizeof(AuthToken) + toklen + 1);
authtoken->string = (char *) authtoken + sizeof(AuthToken);
authtoken->quoted = quoted;
authtoken->regex = NULL;
memcpy(authtoken->string, token, toklen + 1);
return authtoken;
}
/*
* Free an AuthToken, that may include a regular expression that needs
* to be cleaned up explicitly.
*/
static void
free_auth_token(AuthToken *token)
{
if (token_has_regexp(token))
pg_regfree(token->regex);
}
/*
* Copy a AuthToken struct into freshly palloc'd memory.
*/
@ -286,6 +303,56 @@ copy_auth_token(AuthToken *in)
return out;
}
/*
* Compile the regular expression and store it in the AuthToken given in
* input. Returns the result of pg_regcomp().
*/
static int
regcomp_auth_token(AuthToken *token)
{
pg_wchar *wstr;
int wlen;
int rc;
Assert(token->regex == NULL);
if (token->string[0] != '/')
return 0; /* nothing to compile */
token->regex = (regex_t *) palloc0(sizeof(regex_t));
wstr = palloc((strlen(token->string + 1) + 1) * sizeof(pg_wchar));
wlen = pg_mb2wchar_with_len(token->string + 1,
wstr, strlen(token->string + 1));
rc = pg_regcomp(token->regex, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);
pfree(wstr);
return rc;
}
/*
* Execute a regular expression computed in an AuthToken, checking for a match
* with the string specified in "match". The caller may optionally give an
* array to store the matches. Returns the result of pg_regexec().
*/
static int
regexec_auth_token(const char *match, AuthToken *token, size_t nmatch,
regmatch_t pmatch[])
{
pg_wchar *wmatchstr;
int wmatchlen;
int r;
Assert(token->string[0] == '/' && token->regex);
wmatchstr = palloc((strlen(match) + 1) * sizeof(pg_wchar));
wmatchlen = pg_mb2wchar_with_len(match, wmatchstr, strlen(match));
r = pg_regexec(token->regex, wmatchstr, wmatchlen, 0, NULL, nmatch, pmatch, 0);
pfree(wmatchstr);
return r;
}
/*
* Tokenize one HBA field from a line, handling file inclusion and comma lists.
@ -2307,6 +2374,7 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
List *tokens;
AuthToken *token;
IdentLine *parsedline;
int rc;
Assert(tok_line->fields != NIL);
field = list_head(tok_line->fields);
@ -2326,7 +2394,9 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
tokens = lfirst(field);
IDENT_MULTI_VALUE(tokens);
token = linitial(tokens);
parsedline->ident_user = pstrdup(token->string);
/* Copy the ident user token */
parsedline->token = copy_auth_token(token);
/* Get the PG rolename token */
field = lnext(tok_line->fields, field);
@ -2336,40 +2406,27 @@ parse_ident_line(TokenizedAuthLine *tok_line, int elevel)
token = linitial(tokens);
parsedline->pg_role = pstrdup(token->string);
if (parsedline->ident_user[0] == '/')
/*
* Now that the field validation is done, compile a regex from the user
* token, if necessary.
*/
rc = regcomp_auth_token(parsedline->token);
if (rc)
{
/*
* When system username starts with a slash, treat it as a regular
* expression. Pre-compile it.
*/
int r;
pg_wchar *wstr;
int wlen;
char errstr[100];
wstr = palloc((strlen(parsedline->ident_user + 1) + 1) * sizeof(pg_wchar));
wlen = pg_mb2wchar_with_len(parsedline->ident_user + 1,
wstr, strlen(parsedline->ident_user + 1));
r = pg_regcomp(&parsedline->re, wstr, wlen, REG_ADVANCED, C_COLLATION_OID);
if (r)
{
char errstr[100];
pg_regerror(r, &parsedline->re, errstr, sizeof(errstr));
ereport(elevel,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression \"%s\": %s",
parsedline->ident_user + 1, errstr),
errcontext("line %d of configuration file \"%s\"",
pg_regerror(rc, parsedline->token->regex, errstr, sizeof(errstr));
ereport(elevel,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression \"%s\": %s",
parsedline->token->string + 1, errstr),
errcontext("line %d of configuration file \"%s\"",
line_num, IdentFileName)));
*err_msg = psprintf("invalid regular expression \"%s\": %s",
parsedline->ident_user + 1, errstr);
*err_msg = psprintf("invalid regular expression \"%s\": %s",
parsedline->token->string + 1, errstr);
pfree(wstr);
return NULL;
}
pfree(wstr);
return NULL;
}
return parsedline;
@ -2394,25 +2451,19 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
return;
/* Match? */
if (identLine->ident_user[0] == '/')
if (token_has_regexp(identLine->token))
{
/*
* When system username starts with a slash, treat it as a regular
* expression. In this case, we process the system username as a
* regular expression that returns exactly one match. This is replaced
* for \1 in the database username string, if present.
* Process the system username as a regular expression that returns
* exactly one match. This is replaced for \1 in the database username
* string, if present.
*/
int r;
regmatch_t matches[2];
pg_wchar *wstr;
int wlen;
char *ofs;
char *regexp_pgrole;
wstr = palloc((strlen(ident_user) + 1) * sizeof(pg_wchar));
wlen = pg_mb2wchar_with_len(ident_user, wstr, strlen(ident_user));
r = pg_regexec(&identLine->re, wstr, wlen, 0, NULL, 2, matches, 0);
r = regexec_auth_token(ident_user, identLine->token, 2, matches);
if (r)
{
char errstr[100];
@ -2420,18 +2471,15 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
if (r != REG_NOMATCH)
{
/* REG_NOMATCH is not an error, everything else is */
pg_regerror(r, &identLine->re, errstr, sizeof(errstr));
pg_regerror(r, identLine->token->regex, errstr, sizeof(errstr));
ereport(LOG,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression match for \"%s\" failed: %s",
identLine->ident_user + 1, errstr)));
identLine->token->string + 1, errstr)));
*error_p = true;
}
pfree(wstr);
return;
}
pfree(wstr);
if ((ofs = strstr(identLine->pg_role, "\\1")) != NULL)
{
@ -2443,7 +2491,7 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
ereport(LOG,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression \"%s\" has no subexpressions as requested by backreference in \"%s\"",
identLine->ident_user + 1, identLine->pg_role)));
identLine->token->string + 1, identLine->pg_role)));
*error_p = true;
return;
}
@ -2490,13 +2538,13 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
if (case_insensitive)
{
if (pg_strcasecmp(identLine->pg_role, pg_role) == 0 &&
pg_strcasecmp(identLine->ident_user, ident_user) == 0)
pg_strcasecmp(identLine->token->string, ident_user) == 0)
*found_p = true;
}
else
{
if (strcmp(identLine->pg_role, pg_role) == 0 &&
strcmp(identLine->ident_user, ident_user) == 0)
strcmp(identLine->token->string, ident_user) == 0)
*found_p = true;
}
}
@ -2646,8 +2694,7 @@ load_ident(void)
foreach(parsed_line_cell, new_parsed_lines)
{
newline = (IdentLine *) lfirst(parsed_line_cell);
if (newline->ident_user[0] == '/')
pg_regfree(&newline->re);
free_auth_token(newline->token);
}
MemoryContextDelete(ident_context);
return false;
@ -2659,8 +2706,7 @@ load_ident(void)
foreach(parsed_line_cell, parsed_ident_lines)
{
newline = (IdentLine *) lfirst(parsed_line_cell);
if (newline->ident_user[0] == '/')
pg_regfree(&newline->re);
free_auth_token(newline->token);
}
}
if (parsed_ident_context != NULL)

View File

@ -467,7 +467,7 @@ fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
if (ident != NULL)
{
values[index++] = CStringGetTextDatum(ident->usermap);
values[index++] = CStringGetTextDatum(ident->ident_user);
values[index++] = CStringGetTextDatum(ident->token->string);
values[index++] = CStringGetTextDatum(ident->pg_role);
}
else

View File

@ -77,6 +77,20 @@ typedef enum ClientCertName
clientCertDN
} ClientCertName;
/*
* A single string token lexed from an authentication configuration file
* (pg_ident.conf or pg_hba.conf), together with whether the token has
* been quoted. If "string" begins with a slash, it may optionally
* contain a regular expression (currently used for pg_ident.conf when
* building IdentLines).
*/
typedef struct AuthToken
{
char *string;
bool quoted;
regex_t *regex;
} AuthToken;
typedef struct HbaLine
{
int linenumber;
@ -127,22 +141,10 @@ typedef struct IdentLine
int linenumber;
char *usermap;
char *ident_user;
char *pg_role;
regex_t re;
AuthToken *token;
} IdentLine;
/*
* A single string token lexed from an authentication configuration file
* (pg_ident.conf or pg_hba.conf), together with whether the token has
* been quoted.
*/
typedef struct AuthToken
{
char *string;
bool quoted;
} AuthToken;
/*
* TokenizedAuthLine represents one line lexed from an authentication
* configuration file. Each item in the "fields" list is a sub-list of