Add --copy-file-range option to pg_upgrade.

The copy_file_range() system call is available on at least Linux and
FreeBSD, and asks the kernel to use efficient ways to copy ranges of a
file.  Options available to the kernel include sharing block ranges
(similar to --clone mode), and pushing down block copies to the storage
layer.

For automated testing, see PG_TEST_PG_UPGRADE_MODE.  (Perhaps in a later
commit we could consider setting this mode for one of the CI targets.)

Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/CA%2BhUKGKe7Hb0-UNih8VD5UNZy5-ojxFb3Pr3xSBBL8qj2M2%3DdQ%40mail.gmail.com
This commit is contained in:
Thomas Munro 2024-03-06 11:39:50 +13:00
parent 2bce0ad67f
commit d93627bcbe
11 changed files with 120 additions and 4 deletions

2
configure vendored
View File

@ -15259,7 +15259,7 @@ fi
LIBS_including_readline="$LIBS" LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'` LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in backtrace_symbols copyfile getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l for ac_func in backtrace_symbols copyfile copy_file_range getifaddrs getpeerucred inet_pton kqueue mbstowcs_l memset_s posix_fallocate ppoll pthread_is_threaded_np setproctitle setproctitle_fast strchrnul strsignal syncfs sync_file_range uselocale wcstombs_l
do : do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"

View File

@ -1749,6 +1749,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS(m4_normalize([ AC_CHECK_FUNCS(m4_normalize([
backtrace_symbols backtrace_symbols
copyfile copyfile
copy_file_range
getifaddrs getifaddrs
getpeerucred getpeerucred
inet_pton inet_pton

View File

@ -263,6 +263,19 @@ PostgreSQL documentation
</listitem> </listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><option>--copy-file-range</option></term>
<listitem>
<para>
Use the <function>copy_file_range</function> system call for efficient
copying. On some file systems this gives results similar to
<option>--clone</option>, sharing physical disk blocks, while on others
it may still copy blocks, but do so via an optimized path. At present,
it is supported on Linux and FreeBSD.
</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><option>-?</option></term> <term><option>-?</option></term>
<term><option>--help</option></term> <term><option>--help</option></term>

View File

@ -2420,6 +2420,7 @@ func_checks = [
['backtrace_symbols', {'dependencies': [execinfo_dep]}], ['backtrace_symbols', {'dependencies': [execinfo_dep]}],
['clock_gettime', {'dependencies': [rt_dep], 'define': false}], ['clock_gettime', {'dependencies': [rt_dep], 'define': false}],
['copyfile'], ['copyfile'],
['copy_file_range'],
# gcc/clang's sanitizer helper library provides dlopen but not dlsym, thus # gcc/clang's sanitizer helper library provides dlopen but not dlsym, thus
# when enabling asan the dlopen check doesn't notice that -ldl is actually # when enabling asan the dlopen check doesn't notice that -ldl is actually
# required. Just checking for dlsym() ought to suffice. # required. Just checking for dlsym() ought to suffice.

View File

@ -20,8 +20,8 @@ export oldinstall=...otherversion/ (old version's install base path)
See DETAILS below for more information about creation of the dump. See DETAILS below for more information about creation of the dump.
You can also test the different transfer modes (--copy, --link, You can also test the different transfer modes (--copy, --link,
--clone) by setting the environment variable PG_TEST_PG_UPGRADE_MODE --clone, --copy-file-range) by setting the environment variable
to the respective command-line option, like PG_TEST_PG_UPGRADE_MODE to the respective command-line option, like
make check PG_TEST_PG_UPGRADE_MODE=--link make check PG_TEST_PG_UPGRADE_MODE=--link

View File

@ -235,6 +235,9 @@ check_new_cluster(void)
break; break;
case TRANSFER_MODE_COPY: case TRANSFER_MODE_COPY:
break; break;
case TRANSFER_MODE_COPY_FILE_RANGE:
check_copy_file_range();
break;
case TRANSFER_MODE_LINK: case TRANSFER_MODE_LINK:
check_hard_link(); check_hard_link();
break; break;

View File

@ -10,6 +10,7 @@
#include "postgres_fe.h" #include "postgres_fe.h"
#include <sys/stat.h> #include <sys/stat.h>
#include <limits.h>
#include <fcntl.h> #include <fcntl.h>
#ifdef HAVE_COPYFILE_H #ifdef HAVE_COPYFILE_H
#include <copyfile.h> #include <copyfile.h>
@ -140,6 +141,45 @@ copyFile(const char *src, const char *dst,
} }
/*
* copyFileByRange()
*
* Copies a relation file from src to dst.
* schemaName/relName are relation's SQL name (used for error messages only).
*/
void
copyFileByRange(const char *src, const char *dst,
const char *schemaName, const char *relName)
{
#ifdef HAVE_COPY_FILE_RANGE
int src_fd;
int dest_fd;
ssize_t nbytes;
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s",
schemaName, relName, src, strerror(errno));
if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
pg_file_create_mode)) < 0)
pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s",
schemaName, relName, dst, strerror(errno));
do
{
nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
if (nbytes < 0)
pg_fatal("error while copying relation \"%s.%s\": could not copy file range from \"%s\" to \"%s\": %s",
schemaName, relName, src, dst, strerror(errno));
}
while (nbytes > 0);
close(src_fd);
close(dest_fd);
#endif
}
/* /*
* linkFile() * linkFile()
* *
@ -358,6 +398,44 @@ check_file_clone(void)
unlink(new_link_file); unlink(new_link_file);
} }
void
check_copy_file_range(void)
{
char existing_file[MAXPGPATH];
char new_link_file[MAXPGPATH];
snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.copy_file_range_test", new_cluster.pgdata);
unlink(new_link_file); /* might fail */
#if defined(HAVE_COPY_FILE_RANGE)
{
int src_fd;
int dest_fd;
if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("could not open file \"%s\": %s",
existing_file, strerror(errno));
if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
pg_file_create_mode)) < 0)
pg_fatal("could not create file \"%s\": %s",
new_link_file, strerror(errno));
if (copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0) < 0)
pg_fatal("could not copy file range between old and new data directories: %s",
strerror(errno));
close(src_fd);
close(dest_fd);
}
#else
pg_fatal("copy_file_range not supported on this platform");
#endif
unlink(new_link_file);
}
void void
check_hard_link(void) check_hard_link(void)
{ {

View File

@ -58,7 +58,8 @@ parseCommandLine(int argc, char *argv[])
{"verbose", no_argument, NULL, 'v'}, {"verbose", no_argument, NULL, 'v'},
{"clone", no_argument, NULL, 1}, {"clone", no_argument, NULL, 1},
{"copy", no_argument, NULL, 2}, {"copy", no_argument, NULL, 2},
{"sync-method", required_argument, NULL, 3}, {"copy-file-range", no_argument, NULL, 3},
{"sync-method", required_argument, NULL, 4},
{NULL, 0, NULL, 0} {NULL, 0, NULL, 0}
}; };
@ -203,6 +204,9 @@ parseCommandLine(int argc, char *argv[])
break; break;
case 3: case 3:
user_opts.transfer_mode = TRANSFER_MODE_COPY_FILE_RANGE;
break;
case 4:
if (!parse_sync_method(optarg, &unused)) if (!parse_sync_method(optarg, &unused))
exit(1); exit(1);
user_opts.sync_method = pg_strdup(optarg); user_opts.sync_method = pg_strdup(optarg);
@ -301,6 +305,7 @@ usage(void)
printf(_(" -V, --version display version information, then exit\n")); printf(_(" -V, --version display version information, then exit\n"));
printf(_(" --clone clone instead of copying files to new cluster\n")); printf(_(" --clone clone instead of copying files to new cluster\n"));
printf(_(" --copy copy files to new cluster (default)\n")); printf(_(" --copy copy files to new cluster (default)\n"));
printf(_(" --copy-file-range copy files to new cluster with copy_file_range\n"));
printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
printf(_(" -?, --help show this help, then exit\n")); printf(_(" -?, --help show this help, then exit\n"));
printf(_("\n" printf(_("\n"

View File

@ -256,6 +256,7 @@ typedef enum
{ {
TRANSFER_MODE_CLONE, TRANSFER_MODE_CLONE,
TRANSFER_MODE_COPY, TRANSFER_MODE_COPY,
TRANSFER_MODE_COPY_FILE_RANGE,
TRANSFER_MODE_LINK, TRANSFER_MODE_LINK,
} transferMode; } transferMode;
@ -402,11 +403,14 @@ void cloneFile(const char *src, const char *dst,
const char *schemaName, const char *relName); const char *schemaName, const char *relName);
void copyFile(const char *src, const char *dst, void copyFile(const char *src, const char *dst,
const char *schemaName, const char *relName); const char *schemaName, const char *relName);
void copyFileByRange(const char *src, const char *dst,
const char *schemaName, const char *relName);
void linkFile(const char *src, const char *dst, void linkFile(const char *src, const char *dst,
const char *schemaName, const char *relName); const char *schemaName, const char *relName);
void rewriteVisibilityMap(const char *fromfile, const char *tofile, void rewriteVisibilityMap(const char *fromfile, const char *tofile,
const char *schemaName, const char *relName); const char *schemaName, const char *relName);
void check_file_clone(void); void check_file_clone(void);
void check_copy_file_range(void);
void check_hard_link(void); void check_hard_link(void);
/* fopen_priv() is no longer different from fopen() */ /* fopen_priv() is no longer different from fopen() */

View File

@ -37,6 +37,9 @@ transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
case TRANSFER_MODE_COPY: case TRANSFER_MODE_COPY:
prep_status_progress("Copying user relation files"); prep_status_progress("Copying user relation files");
break; break;
case TRANSFER_MODE_COPY_FILE_RANGE:
prep_status_progress("Copying user relation files with copy_file_range");
break;
case TRANSFER_MODE_LINK: case TRANSFER_MODE_LINK:
prep_status_progress("Linking user relation files"); prep_status_progress("Linking user relation files");
break; break;
@ -250,6 +253,11 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
old_file, new_file); old_file, new_file);
copyFile(old_file, new_file, map->nspname, map->relname); copyFile(old_file, new_file, map->nspname, map->relname);
break; break;
case TRANSFER_MODE_COPY_FILE_RANGE:
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\" with copy_file_range",
old_file, new_file);
copyFileByRange(old_file, new_file, map->nspname, map->relname);
break;
case TRANSFER_MODE_LINK: case TRANSFER_MODE_LINK:
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"", pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"",
old_file, new_file); old_file, new_file);

View File

@ -78,6 +78,9 @@
/* Define to 1 if you have the <copyfile.h> header file. */ /* Define to 1 if you have the <copyfile.h> header file. */
#undef HAVE_COPYFILE_H #undef HAVE_COPYFILE_H
/* Define to 1 if you have the `copy_file_range' function. */
#undef HAVE_COPY_FILE_RANGE
/* Define to 1 if you have the <crtdefs.h> header file. */ /* Define to 1 if you have the <crtdefs.h> header file. */
#undef HAVE_CRTDEFS_H #undef HAVE_CRTDEFS_H