postgresql/src/bin/pg_dump/filter.c

472 lines
11 KiB
C

/*-------------------------------------------------------------------------
*
* filter.c
* Implementation of simple filter file parser
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/bin/pg_dump/filter.c
*
*-------------------------------------------------------------------------
*/
#include "postgres_fe.h"
#include "common/fe_memutils.h"
#include "common/logging.h"
#include "common/string.h"
#include "filter.h"
#include "lib/stringinfo.h"
#include "pqexpbuffer.h"
#define is_keyword_str(cstr, str, bytes) \
((strlen(cstr) == (bytes)) && (pg_strncasecmp((cstr), (str), (bytes)) == 0))
/*
* Following routines are called from pg_dump, pg_dumpall and pg_restore.
* Since the implementation of exit_nicely is application specific, each
* application need to pass a function pointer to the exit_nicely function to
* use for exiting on errors.
*/
/*
* Opens filter's file and initialize fstate structure.
*/
void
filter_init(FilterStateData *fstate, const char *filename, exit_function f_exit)
{
fstate->filename = filename;
fstate->lineno = 0;
fstate->exit_nicely = f_exit;
initStringInfo(&fstate->linebuff);
if (strcmp(filename, "-") != 0)
{
fstate->fp = fopen(filename, "r");
if (!fstate->fp)
{
pg_log_error("could not open filter file \"%s\": %m", filename);
fstate->exit_nicely(1);
}
}
else
fstate->fp = stdin;
}
/*
* Release allocated resources for the given filter.
*/
void
filter_free(FilterStateData *fstate)
{
if (!fstate)
return;
free(fstate->linebuff.data);
fstate->linebuff.data = NULL;
if (fstate->fp && fstate->fp != stdin)
{
if (fclose(fstate->fp) != 0)
pg_log_error("could not close filter file \"%s\": %m", fstate->filename);
fstate->fp = NULL;
}
}
/*
* Translate FilterObjectType enum to string. The main purpose is for error
* message formatting.
*/
const char *
filter_object_type_name(FilterObjectType fot)
{
switch (fot)
{
case FILTER_OBJECT_TYPE_NONE:
return "comment or empty line";
case FILTER_OBJECT_TYPE_TABLE_DATA:
return "table data";
case FILTER_OBJECT_TYPE_TABLE_DATA_AND_CHILDREN:
return "table data and children";
case FILTER_OBJECT_TYPE_DATABASE:
return "database";
case FILTER_OBJECT_TYPE_EXTENSION:
return "extension";
case FILTER_OBJECT_TYPE_FOREIGN_DATA:
return "foreign data";
case FILTER_OBJECT_TYPE_FUNCTION:
return "function";
case FILTER_OBJECT_TYPE_INDEX:
return "index";
case FILTER_OBJECT_TYPE_SCHEMA:
return "schema";
case FILTER_OBJECT_TYPE_TABLE:
return "table";
case FILTER_OBJECT_TYPE_TABLE_AND_CHILDREN:
return "table and children";
case FILTER_OBJECT_TYPE_TRIGGER:
return "trigger";
}
/* should never get here */
pg_unreachable();
}
/*
* Returns true when keyword is one of supported object types, and
* set related objtype. Returns false, when keyword is not assigned
* with known object type.
*/
static bool
get_object_type(const char *keyword, int size, FilterObjectType *objtype)
{
if (is_keyword_str("table_data", keyword, size))
*objtype = FILTER_OBJECT_TYPE_TABLE_DATA;
else if (is_keyword_str("table_data_and_children", keyword, size))
*objtype = FILTER_OBJECT_TYPE_TABLE_DATA_AND_CHILDREN;
else if (is_keyword_str("database", keyword, size))
*objtype = FILTER_OBJECT_TYPE_DATABASE;
else if (is_keyword_str("extension", keyword, size))
*objtype = FILTER_OBJECT_TYPE_EXTENSION;
else if (is_keyword_str("foreign_data", keyword, size))
*objtype = FILTER_OBJECT_TYPE_FOREIGN_DATA;
else if (is_keyword_str("function", keyword, size))
*objtype = FILTER_OBJECT_TYPE_FUNCTION;
else if (is_keyword_str("index", keyword, size))
*objtype = FILTER_OBJECT_TYPE_INDEX;
else if (is_keyword_str("schema", keyword, size))
*objtype = FILTER_OBJECT_TYPE_SCHEMA;
else if (is_keyword_str("table", keyword, size))
*objtype = FILTER_OBJECT_TYPE_TABLE;
else if (is_keyword_str("table_and_children", keyword, size))
*objtype = FILTER_OBJECT_TYPE_TABLE_AND_CHILDREN;
else if (is_keyword_str("trigger", keyword, size))
*objtype = FILTER_OBJECT_TYPE_TRIGGER;
else
return false;
return true;
}
void
pg_log_filter_error(FilterStateData *fstate, const char *fmt,...)
{
va_list argp;
char buf[256];
va_start(argp, fmt);
vsnprintf(buf, sizeof(buf), fmt, argp);
va_end(argp);
pg_log_error("invalid format in filter read from \"%s\" on line %d: %s",
(fstate->fp == stdin ? "stdin" : fstate->filename),
fstate->lineno,
buf);
}
/*
* filter_get_keyword - read the next filter keyword from buffer
*
* Search for keywords (limited to ascii alphabetic characters) in
* the passed in line buffer. Returns NULL when the buffer is empty or the first
* char is not alpha. The char '_' is allowed, except as the first character.
* The length of the found keyword is returned in the size parameter.
*/
static const char *
filter_get_keyword(const char **line, int *size)
{
const char *ptr = *line;
const char *result = NULL;
/* Set returned length preemptively in case no keyword is found */
*size = 0;
/* Skip initial whitespace */
while (isspace(*ptr))
ptr++;
if (isalpha(*ptr))
{
result = ptr++;
while (isalpha(*ptr) || *ptr == '_')
ptr++;
*size = ptr - result;
}
*line = ptr;
return result;
}
/*
* read_quoted_pattern - read quoted possibly multi line string
*
* Reads a quoted string which can span over multiple lines and returns a
* pointer to next char after ending double quotes; it will exit on errors.
*/
static const char *
read_quoted_string(FilterStateData *fstate,
const char *str,
PQExpBuffer pattern)
{
appendPQExpBufferChar(pattern, '"');
str++;
while (1)
{
/*
* We can ignore \r or \n chars because the string is read by
* pg_get_line_buf, so these chars should be just trailing chars.
*/
if (*str == '\r' || *str == '\n')
{
str++;
continue;
}
if (*str == '\0')
{
Assert(fstate->linebuff.data);
if (!pg_get_line_buf(fstate->fp, &fstate->linebuff))
{
if (ferror(fstate->fp))
pg_log_error("could not read from filter file \"%s\": %m",
fstate->filename);
else
pg_log_filter_error(fstate, _("unexpected end of file"));
fstate->exit_nicely(1);
}
str = fstate->linebuff.data;
appendPQExpBufferChar(pattern, '\n');
fstate->lineno++;
}
if (*str == '"')
{
appendPQExpBufferChar(pattern, '"');
str++;
if (*str == '"')
{
appendPQExpBufferChar(pattern, '"');
str++;
}
else
break;
}
else if (*str == '\\')
{
str++;
if (*str == 'n')
appendPQExpBufferChar(pattern, '\n');
else if (*str == '\\')
appendPQExpBufferChar(pattern, '\\');
str++;
}
else
appendPQExpBufferChar(pattern, *str++);
}
return str;
}
/*
* read_pattern - reads on object pattern from input
*
* This function will parse any valid identifier (quoted or not, qualified or
* not), which can also includes the full signature for routines.
* Note that this function takes special care to sanitize the detected
* identifier (removing extraneous whitespaces or other unnecessary
* characters). This is necessary as most backup/restore filtering functions
* only recognize identifiers if they are written exactly the same way as
* they are output by the server.
*
* Returns a pointer to next character after the found identifier and exits
* on error.
*/
static const char *
read_pattern(FilterStateData *fstate, const char *str, PQExpBuffer pattern)
{
bool skip_space = true;
bool found_space = false;
/* Skip initial whitespace */
while (isspace(*str))
str++;
if (*str == '\0')
{
pg_log_filter_error(fstate, _("missing object name pattern"));
fstate->exit_nicely(1);
}
while (*str && *str != '#')
{
while (*str && !isspace(*str) && !strchr("#,.()\"", *str))
{
/*
* Append space only when it is allowed, and when it was found in
* original string.
*/
if (!skip_space && found_space)
{
appendPQExpBufferChar(pattern, ' ');
skip_space = true;
}
appendPQExpBufferChar(pattern, *str++);
}
skip_space = false;
if (*str == '"')
{
if (found_space)
appendPQExpBufferChar(pattern, ' ');
str = read_quoted_string(fstate, str, pattern);
}
else if (*str == ',')
{
appendPQExpBufferStr(pattern, ", ");
skip_space = true;
str++;
}
else if (*str && strchr(".()", *str))
{
appendPQExpBufferChar(pattern, *str++);
skip_space = true;
}
found_space = false;
/* skip ending whitespaces */
while (isspace(*str))
{
found_space = true;
str++;
}
}
return str;
}
/*
* filter_read_item - Read command/type/pattern triplet from a filter file
*
* This will parse one filter item from the filter file, and while it is a
* row based format a pattern may span more than one line due to how object
* names can be constructed. The expected format of the filter file is:
*
* <command> <object_type> <pattern>
*
* command can be "include" or "exclude".
*
* Supported object types are described by enum FilterObjectType
* (see function get_object_type).
*
* pattern can be any possibly-quoted and possibly-qualified identifier. It
* follows the same rules as other object include and exclude functions so it
* can also use wildcards.
*
* Returns true when one filter item was successfully read and parsed. When
* object name contains \n chars, then more than one line from input file can
* be processed. Returns false when the filter file reaches EOF. In case of
* error, the function will emit an appropriate error message and exit.
*/
bool
filter_read_item(FilterStateData *fstate,
char **objname,
FilterCommandType *comtype,
FilterObjectType *objtype)
{
if (pg_get_line_buf(fstate->fp, &fstate->linebuff))
{
const char *str = fstate->linebuff.data;
const char *keyword;
int size;
PQExpBufferData pattern;
fstate->lineno++;
/* Skip initial white spaces */
while (isspace(*str))
str++;
/*
* Skip empty lines or lines where the first non-whitespace character
* is a hash indicating a comment.
*/
if (*str != '\0' && *str != '#')
{
/*
* First we expect sequence of two keywords, {include|exclude}
* followed by the object type to operate on.
*/
keyword = filter_get_keyword(&str, &size);
if (!keyword)
{
pg_log_filter_error(fstate,
_("no filter command found (expected \"include\" or \"exclude\")"));
fstate->exit_nicely(1);
}
if (is_keyword_str("include", keyword, size))
*comtype = FILTER_COMMAND_TYPE_INCLUDE;
else if (is_keyword_str("exclude", keyword, size))
*comtype = FILTER_COMMAND_TYPE_EXCLUDE;
else
{
pg_log_filter_error(fstate,
_("invalid filter command (expected \"include\" or \"exclude\")"));
fstate->exit_nicely(1);
}
keyword = filter_get_keyword(&str, &size);
if (!keyword)
{
pg_log_filter_error(fstate, _("missing filter object type"));
fstate->exit_nicely(1);
}
if (!get_object_type(keyword, size, objtype))
{
pg_log_filter_error(fstate,
_("unsupported filter object type: \"%.*s\""), size, keyword);
fstate->exit_nicely(1);
}
initPQExpBuffer(&pattern);
str = read_pattern(fstate, str, &pattern);
*objname = pattern.data;
}
else
{
*objname = NULL;
*comtype = FILTER_COMMAND_TYPE_NONE;
*objtype = FILTER_OBJECT_TYPE_NONE;
}
return true;
}
if (ferror(fstate->fp))
{
pg_log_error("could not read from filter file \"%s\": %m", fstate->filename);
fstate->exit_nicely(1);
}
return false;
}