pg_upgrade: Allow use of file cloning
Add another transfer mode --clone to pg_upgrade (besides the existing --link and the default copy), using special file cloning calls. This makes the file transfer faster and more space efficient, achieving speed similar to --link mode without the associated drawbacks. On Linux, file cloning is supported on Btrfs and XFS (if formatted with reflink support). On macOS, file cloning is supported on APFS. Reviewed-by: Michael Paquier <michael@paquier.xyz>
This commit is contained in:
parent
5f32b29c18
commit
3a769d8239
|
@ -15130,7 +15130,7 @@ fi
|
||||||
LIBS_including_readline="$LIBS"
|
LIBS_including_readline="$LIBS"
|
||||||
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
||||||
|
|
||||||
for ac_func in cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul symlink sync_file_range utime utimes wcstombs_l
|
for ac_func in cbrt clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul symlink sync_file_range utime utimes wcstombs_l
|
||||||
do :
|
do :
|
||||||
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
||||||
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
|
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
|
||||||
|
|
|
@ -1602,6 +1602,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
||||||
AC_CHECK_FUNCS(m4_normalize([
|
AC_CHECK_FUNCS(m4_normalize([
|
||||||
cbrt
|
cbrt
|
||||||
clock_gettime
|
clock_gettime
|
||||||
|
copyfile
|
||||||
fdatasync
|
fdatasync
|
||||||
getifaddrs
|
getifaddrs
|
||||||
getpeerucred
|
getpeerucred
|
||||||
|
|
|
@ -182,6 +182,28 @@
|
||||||
<listitem><para>display version information, then exit</para></listitem>
|
<listitem><para>display version information, then exit</para></listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><option>--clone</option></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Use efficient file cloning (also known as <quote>reflinks</quote> on
|
||||||
|
some systems) instead of copying files to the new cluster. This can
|
||||||
|
result in near-instantaneous copying of the data files, giving the
|
||||||
|
speed advantages of <option>-k</option>/<option>--link</option> while
|
||||||
|
leaving the old cluster untouched.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
File cloning is only supported on some operating systems and file
|
||||||
|
systems. If it is selected but not supported, the
|
||||||
|
<application>pg_upgrade</application> run will error. At present, it
|
||||||
|
is supported on Linux (kernel 4.5 or later) with Btrfs and XFS (on
|
||||||
|
file systems created with reflink support, which is not the default
|
||||||
|
for XFS at this writing), and on macOS with APFS.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><option>-?</option></term>
|
<term><option>-?</option></term>
|
||||||
<term><option>--help</option></term>
|
<term><option>--help</option></term>
|
||||||
|
@ -340,7 +362,7 @@ NET STOP postgresql-&majorversion;
|
||||||
Always run the <application>pg_upgrade</application> binary of the new server, not the old one.
|
Always run the <application>pg_upgrade</application> binary of the new server, not the old one.
|
||||||
<application>pg_upgrade</application> requires the specification of the old and new cluster's
|
<application>pg_upgrade</application> requires the specification of the old and new cluster's
|
||||||
data and executable (<filename>bin</filename>) directories. You can also specify
|
data and executable (<filename>bin</filename>) directories. You can also specify
|
||||||
user and port values, and whether you want the data files linked
|
user and port values, and whether you want the data files linked or cloned
|
||||||
instead of the default copy behavior.
|
instead of the default copy behavior.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
@ -351,8 +373,12 @@ NET STOP postgresql-&majorversion;
|
||||||
once you start the new cluster after the upgrade. Link mode also
|
once you start the new cluster after the upgrade. Link mode also
|
||||||
requires that the old and new cluster data directories be in the
|
requires that the old and new cluster data directories be in the
|
||||||
same file system. (Tablespaces and <filename>pg_wal</filename> can be on
|
same file system. (Tablespaces and <filename>pg_wal</filename> can be on
|
||||||
different file systems.) See <literal>pg_upgrade --help</literal> for a full
|
different file systems.)
|
||||||
list of options.
|
The clone mode provides the same speed and disk space advantages but will
|
||||||
|
not leave the old cluster unusable after the upgrade. The clone mode
|
||||||
|
also requires that the old and new data directories be in the same file
|
||||||
|
system. The clone mode is only available on certain operating systems
|
||||||
|
and file systems.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
@ -388,8 +414,9 @@ pg_upgrade.exe
|
||||||
to perform only the checks, even if the old server is still
|
to perform only the checks, even if the old server is still
|
||||||
running. <command>pg_upgrade --check</command> will also outline any
|
running. <command>pg_upgrade --check</command> will also outline any
|
||||||
manual adjustments you will need to make after the upgrade. If you
|
manual adjustments you will need to make after the upgrade. If you
|
||||||
are going to be using link mode, you should use the <option>--link</option>
|
are going to be using link or clone mode, you should use the option
|
||||||
option with <option>--check</option> to enable link-mode-specific checks.
|
<option>--link</option> or <option>--clone</option> with
|
||||||
|
<option>--check</option> to enable mode-specific checks.
|
||||||
<command>pg_upgrade</command> requires write permission in the current directory.
|
<command>pg_upgrade</command> requires write permission in the current directory.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
@ -722,7 +749,8 @@ psql --username=postgres --file=script.sql postgres
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
If you want to use link mode and you do not want your old cluster
|
If you want to use link mode and you do not want your old cluster
|
||||||
to be modified when the new cluster is started, make a copy of the
|
to be modified when the new cluster is started, consider using the clone mode.
|
||||||
|
If that is not available, make a copy of the
|
||||||
old cluster and upgrade that in link mode. To make a valid copy
|
old cluster and upgrade that in link mode. To make a valid copy
|
||||||
of the old cluster, use <command>rsync</command> to create a dirty
|
of the old cluster, use <command>rsync</command> to create a dirty
|
||||||
copy of the old cluster while the server is running, then shut down
|
copy of the old cluster while the server is running, then shut down
|
||||||
|
|
|
@ -149,8 +149,17 @@ check_new_cluster(void)
|
||||||
|
|
||||||
check_loadable_libraries();
|
check_loadable_libraries();
|
||||||
|
|
||||||
if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
|
switch (user_opts.transfer_mode)
|
||||||
|
{
|
||||||
|
case TRANSFER_MODE_CLONE:
|
||||||
|
check_file_clone();
|
||||||
|
break;
|
||||||
|
case TRANSFER_MODE_COPY:
|
||||||
|
break;
|
||||||
|
case TRANSFER_MODE_LINK:
|
||||||
check_hard_link();
|
check_hard_link();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
check_is_install_user(&new_cluster);
|
check_is_install_user(&new_cluster);
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,13 @@
|
||||||
|
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
#ifdef HAVE_COPYFILE
|
||||||
|
#include <copyfile.h>
|
||||||
|
#endif
|
||||||
|
#ifdef __linux__
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <linux/fs.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
|
@ -25,6 +32,47 @@ static int win32_pghardlink(const char *src, const char *dst);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* cloneFile()
|
||||||
|
*
|
||||||
|
* Clones/reflinks a relation file from src to dst.
|
||||||
|
*
|
||||||
|
* schemaName/relName are relation's SQL name (used for error messages only).
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
cloneFile(const char *src, const char *dst,
|
||||||
|
const char *schemaName, const char *relName)
|
||||||
|
{
|
||||||
|
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
|
||||||
|
if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
|
||||||
|
pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
|
||||||
|
schemaName, relName, src, dst, strerror(errno));
|
||||||
|
#elif defined(__linux__) && defined(FICLONE)
|
||||||
|
int src_fd;
|
||||||
|
int dest_fd;
|
||||||
|
|
||||||
|
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
|
||||||
|
pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
|
||||||
|
schemaName, relName, src, strerror(errno));
|
||||||
|
|
||||||
|
if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
|
||||||
|
pg_file_create_mode)) < 0)
|
||||||
|
pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
|
||||||
|
schemaName, relName, dst, strerror(errno));
|
||||||
|
|
||||||
|
if (ioctl(dest_fd, FICLONE, src_fd) < 0)
|
||||||
|
{
|
||||||
|
unlink(dst);
|
||||||
|
pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
|
||||||
|
schemaName, relName, src, dst, strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
close(src_fd);
|
||||||
|
close(dest_fd);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* copyFile()
|
* copyFile()
|
||||||
*
|
*
|
||||||
|
@ -270,6 +318,48 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
|
||||||
close(src_fd);
|
close(src_fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
check_file_clone(void)
|
||||||
|
{
|
||||||
|
char existing_file[MAXPGPATH];
|
||||||
|
char new_link_file[MAXPGPATH];
|
||||||
|
|
||||||
|
snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
|
||||||
|
snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
|
||||||
|
unlink(new_link_file); /* might fail */
|
||||||
|
|
||||||
|
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
|
||||||
|
if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
|
||||||
|
pg_fatal("could not clone file between old and new data directories: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
#elif defined(__linux__) && defined(FICLONE)
|
||||||
|
{
|
||||||
|
int src_fd;
|
||||||
|
int dest_fd;
|
||||||
|
|
||||||
|
if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
|
||||||
|
pg_fatal("could not open file \"%s\": %s\n",
|
||||||
|
existing_file, strerror(errno));
|
||||||
|
|
||||||
|
if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
|
||||||
|
pg_file_create_mode)) < 0)
|
||||||
|
pg_fatal("could not create file \"%s\": %s\n",
|
||||||
|
new_link_file, strerror(errno));
|
||||||
|
|
||||||
|
if (ioctl(dest_fd, FICLONE, src_fd) < 0)
|
||||||
|
pg_fatal("could not clone file between old and new data directories: %s\n",
|
||||||
|
strerror(errno));
|
||||||
|
|
||||||
|
close(src_fd);
|
||||||
|
close(dest_fd);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
pg_fatal("file cloning not supported on this platform\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unlink(new_link_file);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
check_hard_link(void)
|
check_hard_link(void)
|
||||||
{
|
{
|
||||||
|
|
|
@ -53,6 +53,8 @@ parseCommandLine(int argc, char *argv[])
|
||||||
{"retain", no_argument, NULL, 'r'},
|
{"retain", no_argument, NULL, 'r'},
|
||||||
{"jobs", required_argument, NULL, 'j'},
|
{"jobs", required_argument, NULL, 'j'},
|
||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
|
{"clone", no_argument, NULL, 1},
|
||||||
|
|
||||||
{NULL, 0, NULL, 0}
|
{NULL, 0, NULL, 0}
|
||||||
};
|
};
|
||||||
int option; /* Command line option */
|
int option; /* Command line option */
|
||||||
|
@ -203,6 +205,10 @@ parseCommandLine(int argc, char *argv[])
|
||||||
log_opts.verbose = true;
|
log_opts.verbose = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 1:
|
||||||
|
user_opts.transfer_mode = TRANSFER_MODE_CLONE;
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
pg_fatal("Try \"%s --help\" for more information.\n",
|
pg_fatal("Try \"%s --help\" for more information.\n",
|
||||||
os_info.progname);
|
os_info.progname);
|
||||||
|
@ -293,6 +299,7 @@ usage(void)
|
||||||
printf(_(" -U, --username=NAME cluster superuser (default \"%s\")\n"), os_info.user);
|
printf(_(" -U, --username=NAME cluster superuser (default \"%s\")\n"), os_info.user);
|
||||||
printf(_(" -v, --verbose enable verbose internal logging\n"));
|
printf(_(" -v, --verbose enable verbose internal logging\n"));
|
||||||
printf(_(" -V, --version display version information, then exit\n"));
|
printf(_(" -V, --version display version information, then exit\n"));
|
||||||
|
printf(_(" --clone clone instead of copying files to new cluster\n"));
|
||||||
printf(_(" -?, --help show this help, then exit\n"));
|
printf(_(" -?, --help show this help, then exit\n"));
|
||||||
printf(_("\n"
|
printf(_("\n"
|
||||||
"Before running pg_upgrade you must:\n"
|
"Before running pg_upgrade you must:\n"
|
||||||
|
|
|
@ -230,10 +230,11 @@ typedef struct
|
||||||
} ControlData;
|
} ControlData;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enumeration to denote link modes
|
* Enumeration to denote transfer modes
|
||||||
*/
|
*/
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
TRANSFER_MODE_CLONE,
|
||||||
TRANSFER_MODE_COPY,
|
TRANSFER_MODE_COPY,
|
||||||
TRANSFER_MODE_LINK
|
TRANSFER_MODE_LINK
|
||||||
} transferMode;
|
} transferMode;
|
||||||
|
@ -372,12 +373,15 @@ bool pid_lock_file_exists(const char *datadir);
|
||||||
|
|
||||||
/* file.c */
|
/* file.c */
|
||||||
|
|
||||||
|
void cloneFile(const char *src, const char *dst,
|
||||||
|
const char *schemaName, const char *relName);
|
||||||
void copyFile(const char *src, const char *dst,
|
void copyFile(const char *src, const char *dst,
|
||||||
const char *schemaName, const char *relName);
|
const char *schemaName, const char *relName);
|
||||||
void linkFile(const char *src, const char *dst,
|
void linkFile(const char *src, const char *dst,
|
||||||
const char *schemaName, const char *relName);
|
const char *schemaName, const char *relName);
|
||||||
void rewriteVisibilityMap(const char *fromfile, const char *tofile,
|
void rewriteVisibilityMap(const char *fromfile, const char *tofile,
|
||||||
const char *schemaName, const char *relName);
|
const char *schemaName, const char *relName);
|
||||||
|
void check_file_clone(void);
|
||||||
void check_hard_link(void);
|
void check_hard_link(void);
|
||||||
|
|
||||||
/* fopen_priv() is no longer different from fopen() */
|
/* fopen_priv() is no longer different from fopen() */
|
||||||
|
|
|
@ -30,10 +30,18 @@ void
|
||||||
transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
|
transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
|
||||||
char *old_pgdata, char *new_pgdata)
|
char *old_pgdata, char *new_pgdata)
|
||||||
{
|
{
|
||||||
if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
|
switch (user_opts.transfer_mode)
|
||||||
pg_log(PG_REPORT, "Linking user relation files\n");
|
{
|
||||||
else
|
case TRANSFER_MODE_CLONE:
|
||||||
|
pg_log(PG_REPORT, "Cloning user relation files\n");
|
||||||
|
break;
|
||||||
|
case TRANSFER_MODE_COPY:
|
||||||
pg_log(PG_REPORT, "Copying user relation files\n");
|
pg_log(PG_REPORT, "Copying user relation files\n");
|
||||||
|
break;
|
||||||
|
case TRANSFER_MODE_LINK:
|
||||||
|
pg_log(PG_REPORT, "Linking user relation files\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Transferring files by tablespace is tricky because a single database
|
* Transferring files by tablespace is tricky because a single database
|
||||||
|
@ -250,14 +258,20 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
|
||||||
old_file, new_file);
|
old_file, new_file);
|
||||||
rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname);
|
rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname);
|
||||||
}
|
}
|
||||||
else if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
|
else
|
||||||
|
switch (user_opts.transfer_mode)
|
||||||
{
|
{
|
||||||
|
case TRANSFER_MODE_CLONE:
|
||||||
|
pg_log(PG_VERBOSE, "cloning \"%s\" to \"%s\"\n",
|
||||||
|
old_file, new_file);
|
||||||
|
cloneFile(old_file, new_file, map->nspname, map->relname);
|
||||||
|
break;
|
||||||
|
case TRANSFER_MODE_COPY:
|
||||||
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
|
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
|
||||||
old_file, new_file);
|
old_file, new_file);
|
||||||
copyFile(old_file, new_file, map->nspname, map->relname);
|
copyFile(old_file, new_file, map->nspname, map->relname);
|
||||||
}
|
break;
|
||||||
else
|
case TRANSFER_MODE_LINK:
|
||||||
{
|
|
||||||
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n",
|
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n",
|
||||||
old_file, new_file);
|
old_file, new_file);
|
||||||
linkFile(old_file, new_file, map->nspname, map->relname);
|
linkFile(old_file, new_file, map->nspname, map->relname);
|
||||||
|
|
|
@ -114,6 +114,9 @@
|
||||||
/* Define to 1 if your compiler handles computed gotos. */
|
/* Define to 1 if your compiler handles computed gotos. */
|
||||||
#undef HAVE_COMPUTED_GOTO
|
#undef HAVE_COMPUTED_GOTO
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `copyfile' function. */
|
||||||
|
#undef HAVE_COPYFILE
|
||||||
|
|
||||||
/* Define to 1 if you have the <crtdefs.h> header file. */
|
/* Define to 1 if you have the <crtdefs.h> header file. */
|
||||||
#undef HAVE_CRTDEFS_H
|
#undef HAVE_CRTDEFS_H
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue