From c6306db24bd913375f99494e38ab315befe44e11 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Tue, 15 Mar 2022 13:24:23 -0400 Subject: [PATCH] Add 'basebackup_to_shell' contrib module. As a demonstration of the sort of thing that can be done by adding a custom backup target, this defines a 'shell' target which executes a command defined by the system administrator. The command is executed once for each tar archive generate by the backup and once for the backup manifest, if any. Each time the command is executed, it receives the contents of th file for which it is executed via standard input. The configured command can use %f to refer to the name of the archive (e.g. base.tar, $TABLESPACE_OID.tar, backup_manifest) and %d to refer to the target detail (pg_basebackup --target shell:DETAIL). A target detail is required if %d appears in the configured command and forbidden if it does not. Patch by me, reviewed by Abhijit Menon-Sen. Discussion: http://postgr.es/m/CA+TgmoaqvdT-u3nt+_kkZ7bgDAyqDB0i-+XOMmr5JN2Rd37hxw@mail.gmail.com --- contrib/Makefile | 1 + contrib/basebackup_to_shell/Makefile | 19 + .../basebackup_to_shell/basebackup_to_shell.c | 419 ++++++++++++++++++ doc/src/sgml/basebackup-to-shell.sgml | 69 +++ doc/src/sgml/contrib.sgml | 1 + doc/src/sgml/filelist.sgml | 1 + 6 files changed, 510 insertions(+) create mode 100644 contrib/basebackup_to_shell/Makefile create mode 100644 contrib/basebackup_to_shell/basebackup_to_shell.c create mode 100644 doc/src/sgml/basebackup-to-shell.sgml diff --git a/contrib/Makefile b/contrib/Makefile index e3e221308b..332b486ecc 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -10,6 +10,7 @@ SUBDIRS = \ auth_delay \ auto_explain \ basic_archive \ + basebackup_to_shell \ bloom \ btree_gin \ btree_gist \ diff --git a/contrib/basebackup_to_shell/Makefile b/contrib/basebackup_to_shell/Makefile new file mode 100644 index 0000000000..f31dfaae9c --- /dev/null +++ b/contrib/basebackup_to_shell/Makefile @@ -0,0 +1,19 @@ +# contrib/basebackup_to_shell/Makefile + +MODULE_big = basebackup_to_shell +OBJS = \ + $(WIN32RES) \ + basebackup_to_shell.o + +PGFILEDESC = "basebackup_to_shell - target basebackup to shell command" + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/basebackup_to_shell +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/basebackup_to_shell/basebackup_to_shell.c b/contrib/basebackup_to_shell/basebackup_to_shell.c new file mode 100644 index 0000000000..d82cb6d13f --- /dev/null +++ b/contrib/basebackup_to_shell/basebackup_to_shell.c @@ -0,0 +1,419 @@ +/*------------------------------------------------------------------------- + * + * basebackup_to_shell.c + * target base backup files to a shell command + * + * Copyright (c) 2016-2022, PostgreSQL Global Development Group + * + * contrib/basebackup_to_shell/basebackup_to_shell.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/xact.h" +#include "miscadmin.h" +#include "replication/basebackup_target.h" +#include "storage/fd.h" +#include "utils/acl.h" +#include "utils/guc.h" + +PG_MODULE_MAGIC; + +typedef struct bbsink_shell +{ + /* Common information for all types of sink. */ + bbsink base; + + /* User-supplied target detail string. */ + char *target_detail; + + /* Shell command pattern being used for this backup. */ + char *shell_command; + + /* The command that is currently running. */ + char *current_command; + + /* Pipe to the running command. */ + FILE *pipe; +} bbsink_shell; + +void _PG_init(void); + +static void *shell_check_detail(char *target, char *target_detail); +static bbsink *shell_get_sink(bbsink *next_sink, void *detail_arg); + +static void bbsink_shell_begin_archive(bbsink *sink, + const char *archive_name); +static void bbsink_shell_archive_contents(bbsink *sink, size_t len); +static void bbsink_shell_end_archive(bbsink *sink); +static void bbsink_shell_begin_manifest(bbsink *sink); +static void bbsink_shell_manifest_contents(bbsink *sink, size_t len); +static void bbsink_shell_end_manifest(bbsink *sink); + +const bbsink_ops bbsink_shell_ops = { + .begin_backup = bbsink_forward_begin_backup, + .begin_archive = bbsink_shell_begin_archive, + .archive_contents = bbsink_shell_archive_contents, + .end_archive = bbsink_shell_end_archive, + .begin_manifest = bbsink_shell_begin_manifest, + .manifest_contents = bbsink_shell_manifest_contents, + .end_manifest = bbsink_shell_end_manifest, + .end_backup = bbsink_forward_end_backup, + .cleanup = bbsink_forward_cleanup +}; + +static char *shell_command = ""; +static char *shell_required_role = ""; + +void +_PG_init(void) +{ + DefineCustomStringVariable("basebackup_to_shell.command", + "Shell command to be executed for each backup file.", + NULL, + &shell_command, + "", + PGC_SIGHUP, + 0, + NULL, NULL, NULL); + + DefineCustomStringVariable("basebackup_to_shell.required_role", + "Backup user must be a member of this role to use shell backup target.", + NULL, + &shell_required_role, + "", + PGC_SIGHUP, + 0, + NULL, NULL, NULL); + + BaseBackupAddTarget("shell", shell_check_detail, shell_get_sink); +} + +/* + * We choose to defer sanity sanity checking until shell_get_sink(), and so + * just pass the target detail through without doing anything. However, we do + * permissions checks here, before any real work has been done. + */ +static void * +shell_check_detail(char *target, char *target_detail) +{ + if (shell_required_role[0] != '\0') + { + Oid roleid; + + StartTransactionCommand(); + roleid = get_role_oid(shell_required_role, true); + if (!is_member_of_role(GetUserId(), roleid)) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to use basebackup_to_shell"))); + CommitTransactionCommand(); + } + + return target_detail; +} + +/* + * Set up a bbsink to implement this base backup target. + * + * This is also a convenient place to sanity check that a target detail was + * given if and only if %d is present. + */ +static bbsink * +shell_get_sink(bbsink *next_sink, void *detail_arg) +{ + bbsink_shell *sink; + bool has_detail_escape = false; + char *c; + + /* + * Set up the bbsink. + * + * We remember the current value of basebackup_to_shell.shell_command to + * be certain that it can't change under us during the backup. + */ + sink = palloc0(sizeof(bbsink_shell)); + *((const bbsink_ops **) &sink->base.bbs_ops) = &bbsink_shell_ops; + sink->base.bbs_next = next_sink; + sink->target_detail = detail_arg; + sink->shell_command = pstrdup(shell_command); + + /* Reject an empty shell command. */ + if (sink->shell_command[0] == '\0') + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("shell command for backup is not configured")); + + /* Determine whether the shell command we're using contains %d. */ + for (c = sink->shell_command; *c != '\0'; ++c) + { + if (c[0] == '%' && c[1] != '\0') + { + if (c[1] == 'd') + has_detail_escape = true; + ++c; + } + } + + /* There should be a target detail if %d was used, and not otherwise. */ + if (has_detail_escape && sink->target_detail == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a target detail is required because the configured command includes %%d"), + errhint("Try \"pg_basebackup --target shell:DETAIL ...\""))); + else if (!has_detail_escape && sink->target_detail != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a target detail is not permitted because the configured command does not include %%d"))); + + /* + * Since we're passing the string provided by the user to popen(), it will + * be interpreted by the shell, which is a potential security + * vulnerability, since the user invoking this module is not necessarily + * a superuser. To stay out of trouble, we must disallow any shell + * metacharacters here; to be conservative and keep things simple, we + * allow only alphanumerics. + */ + if (sink->target_detail != NULL) + { + char *d; + bool scary = false; + + for (d = sink->target_detail; *d != '\0'; ++d) + { + if (*d >= 'a' && *d <= 'z') + continue; + if (*d >= 'A' && *d <= 'Z') + continue; + if (*d >= '0' && *d <= '9') + continue; + scary = true; + break; + } + + if (scary) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("target detail must contain only alphanumeric characters")); + } + + return &sink->base; +} + +/* + * Construct the exact shell command that we're actually going to run, + * making substitutions as appropriate for escape sequences. + */ +static char * +shell_construct_command(char *base_command, const char *filename, + char *target_detail) +{ + StringInfoData buf; + char *c; + + initStringInfo(&buf); + for (c = base_command; *c != '\0'; ++c) + { + /* Anything other than '%' is copied verbatim. */ + if (*c != '%') + { + appendStringInfoChar(&buf, *c); + continue; + } + + /* Any time we see '%' we eat the following character as well. */ + ++c; + + /* + * The following character determines what we insert here, or may + * cause us to throw an error. + */ + if (*c == '%') + { + /* '%%' is replaced by a single '%' */ + appendStringInfoChar(&buf, '%'); + } + else if (*c == 'f') + { + /* '%f' is replaced by the filename */ + appendStringInfoString(&buf, filename); + } + else if (*c == 'd') + { + /* '%d' is replaced by the target detail */ + appendStringInfoString(&buf, target_detail); + } + else if (*c == '\0') + { + /* Incomplete escape sequence, expected a character afterward */ + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("shell command ends unexpectedly after escape character \"%%\"")); + } + else + { + /* Unknown escape sequence */ + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("shell command contains unexpected escape sequence \"%c\"", + *c)); + } + } + + return buf.data; +} + +/* + * Finish executing the shell command once all data has been written. + */ +static void +shell_finish_command(bbsink_shell *sink) +{ + int pclose_rc; + + /* There should be a command running. */ + Assert(sink->current_command != NULL); + Assert(sink->pipe != NULL); + + /* Close down the pipe we opened. */ + pclose_rc = ClosePipeStream(sink->pipe); + if (pclose_rc == -1) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close pipe to external command: %m"))); + else if (pclose_rc != 0) + { + ereport(ERROR, + (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), + errmsg("shell command \"%s\" failed", + sink->current_command), + errdetail_internal("%s", wait_result_to_str(pclose_rc)))); + } + + /* Clean up. */ + sink->pipe = NULL; + pfree(sink->current_command); + sink->current_command = NULL; +} + +/* + * Start up the shell command, substituting %f in for the current filename. + */ +static void +shell_run_command(bbsink_shell *sink, const char *filename) +{ + /* There should not be anything already running. */ + Assert(sink->current_command == NULL); + Assert(sink->pipe == NULL); + + /* Construct a suitable command. */ + sink->current_command = shell_construct_command(sink->shell_command, + filename, + sink->target_detail); + + /* Run it. */ + sink->pipe = OpenPipeStream(sink->current_command, PG_BINARY_W); +} + +/* + * Send accumulated data to the running shell command. + */ +static void +shell_send_data(bbsink_shell *sink, size_t len) +{ + /* There should be a command running. */ + Assert(sink->current_command != NULL); + Assert(sink->pipe != NULL); + + /* Try to write the data. */ + if (fwrite(sink->base.bbs_buffer, len, 1, sink->pipe) != 1 || + ferror(sink->pipe)) + { + if (errno == EPIPE) + { + /* + * The error we're about to throw would shut down the command + * anyway, but we may get a more meaningful error message by + * doing this. If not, we'll fall through to the generic error + * below. + */ + shell_finish_command(sink); + errno = EPIPE; + } + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to shell backup program: %m"))); + } +} + +/* + * At start of archive, start up the shell command and forward to next sink. + */ +static void +bbsink_shell_begin_archive(bbsink *sink, const char *archive_name) +{ + bbsink_shell *mysink = (bbsink_shell *) sink; + + shell_run_command(mysink, archive_name); + bbsink_forward_begin_archive(sink, archive_name); +} + +/* + * Send archive contents to command's stdin and forward to next sink. + */ +static void +bbsink_shell_archive_contents(bbsink *sink, size_t len) +{ + bbsink_shell *mysink = (bbsink_shell *) sink; + + shell_send_data(mysink, len); + bbsink_forward_archive_contents(sink, len); +} + +/* + * At end of archive, shut down the shell command and forward to next sink. + */ +static void +bbsink_shell_end_archive(bbsink *sink) +{ + bbsink_shell *mysink = (bbsink_shell *) sink; + + shell_finish_command(mysink); + bbsink_forward_end_archive(sink); +} + +/* + * At start of manifest, start up the shell command and forward to next sink. + */ +static void +bbsink_shell_begin_manifest(bbsink *sink) +{ + bbsink_shell *mysink = (bbsink_shell *) sink; + + shell_run_command(mysink, "backup_manifest"); + bbsink_forward_begin_manifest(sink); +} + +/* + * Send manifest contents to command's stdin and forward to next sink. + */ +static void +bbsink_shell_manifest_contents(bbsink *sink, size_t len) +{ + bbsink_shell *mysink = (bbsink_shell *) sink; + + shell_send_data(mysink, len); + bbsink_forward_manifest_contents(sink, len); +} + +/* + * At end of manifest, shut down the shell command and forward to next sink. + */ +static void +bbsink_shell_end_manifest(bbsink *sink) +{ + bbsink_shell *mysink = (bbsink_shell *) sink; + + shell_finish_command(mysink); + bbsink_forward_end_manifest(sink); +} diff --git a/doc/src/sgml/basebackup-to-shell.sgml b/doc/src/sgml/basebackup-to-shell.sgml new file mode 100644 index 0000000000..f36f37e510 --- /dev/null +++ b/doc/src/sgml/basebackup-to-shell.sgml @@ -0,0 +1,69 @@ + + + + basebackup_to_shell + + + basebackup_to_shell + + + + basebackup_to_shell adds a custom basebackup target + called shell. This makes it possible to run + pg_basebackup --target=shell or, depending on how this + module is configured, + pg_basebackup --target=shell:DETAIL_STRING, and cause + a server command chosen by the server administrator to be executed for + each tar archive generated by the backup process. The command will receive + the contents of the archive via standard input. + + + + This module is primarily intended as an example of how to create a new + backup targets via an extension module, but in some scenarios it may be + useful for its own sake. + In order to function, this module must be loaded via + or + . + + + + Configuration Parameters + + + + + basebackup_to_shell.command (string) + + basebackup_to_shell.command configuration parameter + + + + + The command which the server should execute for each archive generated + by the backup process. If %f occurs in the command + string, it will be replaced by the name of the archive (e.g. + base.tar). If %d occurs in the + command string, it will be replaced by the target detail provided by + the user. A target detail is required if %d is + used in the command string, and prohibited otherwise. For security + reasons, it may contain only alphanumeric characters. If + %% occurs in the command string, it will be replaced + by a single %. If % occurs in + the command string followed by any other character or at the end of the + string, an error occurs. + + + + + + + + Author + + + Robert Haas rhaas@postgresql.org + + + + diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index be9711c6f2..1e42ce1a7f 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -99,6 +99,7 @@ CREATE EXTENSION module_name; &amcheck; &auth-delay; &auto-explain; + &basebackup-to-shell; &basic-archive; &bloom; &btree-gin; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index 328cd1f378..fd853af01f 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -114,6 +114,7 @@ +