pg_upgrade: Allow use of file cloning

Add another transfer mode --clone to pg_upgrade (besides the existing
--link and the default copy), using special file cloning calls.  This
makes the file transfer faster and more space efficient, achieving
speed similar to --link mode without the associated drawbacks.

On Linux, file cloning is supported on Btrfs and XFS (if formatted with
reflink support).  On macOS, file cloning is supported on APFS.

Reviewed-by: Michael Paquier <michael@paquier.xyz>
This commit is contained in:
Peter Eisentraut 2018-11-07 18:05:54 +01:00
parent 5f32b29c18
commit 3a769d8239
9 changed files with 181 additions and 25 deletions

2
configure vendored
View File

@ -15130,7 +15130,7 @@ fi
LIBS_including_readline="$LIBS"
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
for ac_func in cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul symlink sync_file_range utime utimes wcstombs_l
for ac_func in cbrt clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul symlink sync_file_range utime utimes wcstombs_l
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"

View File

@ -1602,6 +1602,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
AC_CHECK_FUNCS(m4_normalize([
cbrt
clock_gettime
copyfile
fdatasync
getifaddrs
getpeerucred

View File

@ -182,6 +182,28 @@
<listitem><para>display version information, then exit</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--clone</option></term>
<listitem>
<para>
Use efficient file cloning (also known as <quote>reflinks</quote> on
some systems) instead of copying files to the new cluster. This can
result in near-instantaneous copying of the data files, giving the
speed advantages of <option>-k</option>/<option>--link</option> while
leaving the old cluster untouched.
</para>
<para>
File cloning is only supported on some operating systems and file
systems. If it is selected but not supported, the
<application>pg_upgrade</application> run will error. At present, it
is supported on Linux (kernel 4.5 or later) with Btrfs and XFS (on
file systems created with reflink support, which is not the default
for XFS at this writing), and on macOS with APFS.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-?</option></term>
<term><option>--help</option></term>
@ -340,7 +362,7 @@ NET STOP postgresql-&majorversion;
Always run the <application>pg_upgrade</application> binary of the new server, not the old one.
<application>pg_upgrade</application> requires the specification of the old and new cluster's
data and executable (<filename>bin</filename>) directories. You can also specify
user and port values, and whether you want the data files linked
user and port values, and whether you want the data files linked or cloned
instead of the default copy behavior.
</para>
@ -351,8 +373,12 @@ NET STOP postgresql-&majorversion;
once you start the new cluster after the upgrade. Link mode also
requires that the old and new cluster data directories be in the
same file system. (Tablespaces and <filename>pg_wal</filename> can be on
different file systems.) See <literal>pg_upgrade --help</literal> for a full
list of options.
different file systems.)
The clone mode provides the same speed and disk space advantages but will
not leave the old cluster unusable after the upgrade. The clone mode
also requires that the old and new data directories be in the same file
system. The clone mode is only available on certain operating systems
and file systems.
</para>
<para>
@ -388,8 +414,9 @@ pg_upgrade.exe
to perform only the checks, even if the old server is still
running. <command>pg_upgrade --check</command> will also outline any
manual adjustments you will need to make after the upgrade. If you
are going to be using link mode, you should use the <option>--link</option>
option with <option>--check</option> to enable link-mode-specific checks.
are going to be using link or clone mode, you should use the option
<option>--link</option> or <option>--clone</option> with
<option>--check</option> to enable mode-specific checks.
<command>pg_upgrade</command> requires write permission in the current directory.
</para>
@ -722,7 +749,8 @@ psql --username=postgres --file=script.sql postgres
<para>
If you want to use link mode and you do not want your old cluster
to be modified when the new cluster is started, make a copy of the
to be modified when the new cluster is started, consider using the clone mode.
If that is not available, make a copy of the
old cluster and upgrade that in link mode. To make a valid copy
of the old cluster, use <command>rsync</command> to create a dirty
copy of the old cluster while the server is running, then shut down

View File

@ -149,8 +149,17 @@ check_new_cluster(void)
check_loadable_libraries();
if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
switch (user_opts.transfer_mode)
{
case TRANSFER_MODE_CLONE:
check_file_clone();
break;
case TRANSFER_MODE_COPY:
break;
case TRANSFER_MODE_LINK:
check_hard_link();
break;
}
check_is_install_user(&new_cluster);

View File

@ -18,6 +18,13 @@
#include <sys/stat.h>
#include <fcntl.h>
#ifdef HAVE_COPYFILE
#include <copyfile.h>
#endif
#ifdef __linux__
#include <sys/ioctl.h>
#include <linux/fs.h>
#endif
#ifdef WIN32
@ -25,6 +32,47 @@ static int win32_pghardlink(const char *src, const char *dst);
#endif
/*
* cloneFile()
*
* Clones/reflinks a relation file from src to dst.
*
* schemaName/relName are relation's SQL name (used for error messages only).
*/
void
cloneFile(const char *src, const char *dst,
const char *schemaName, const char *relName)
{
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
schemaName, relName, src, dst, strerror(errno));
#elif defined(__linux__) && defined(FICLONE)
int src_fd;
int dest_fd;
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
schemaName, relName, src, strerror(errno));
if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
pg_file_create_mode)) < 0)
pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
schemaName, relName, dst, strerror(errno));
if (ioctl(dest_fd, FICLONE, src_fd) < 0)
{
unlink(dst);
pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
schemaName, relName, src, dst, strerror(errno));
}
close(src_fd);
close(dest_fd);
#endif
}
/*
* copyFile()
*
@ -270,6 +318,48 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
close(src_fd);
}
void
check_file_clone(void)
{
char existing_file[MAXPGPATH];
char new_link_file[MAXPGPATH];
snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
unlink(new_link_file); /* might fail */
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
pg_fatal("could not clone file between old and new data directories: %s\n",
strerror(errno));
#elif defined(__linux__) && defined(FICLONE)
{
int src_fd;
int dest_fd;
if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
pg_fatal("could not open file \"%s\": %s\n",
existing_file, strerror(errno));
if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
pg_file_create_mode)) < 0)
pg_fatal("could not create file \"%s\": %s\n",
new_link_file, strerror(errno));
if (ioctl(dest_fd, FICLONE, src_fd) < 0)
pg_fatal("could not clone file between old and new data directories: %s\n",
strerror(errno));
close(src_fd);
close(dest_fd);
}
#else
pg_fatal("file cloning not supported on this platform\n");
#endif
unlink(new_link_file);
}
void
check_hard_link(void)
{

View File

@ -53,6 +53,8 @@ parseCommandLine(int argc, char *argv[])
{"retain", no_argument, NULL, 'r'},
{"jobs", required_argument, NULL, 'j'},
{"verbose", no_argument, NULL, 'v'},
{"clone", no_argument, NULL, 1},
{NULL, 0, NULL, 0}
};
int option; /* Command line option */
@ -203,6 +205,10 @@ parseCommandLine(int argc, char *argv[])
log_opts.verbose = true;
break;
case 1:
user_opts.transfer_mode = TRANSFER_MODE_CLONE;
break;
default:
pg_fatal("Try \"%s --help\" for more information.\n",
os_info.progname);
@ -293,6 +299,7 @@ usage(void)
printf(_(" -U, --username=NAME cluster superuser (default \"%s\")\n"), os_info.user);
printf(_(" -v, --verbose enable verbose internal logging\n"));
printf(_(" -V, --version display version information, then exit\n"));
printf(_(" --clone clone instead of copying files to new cluster\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\n"
"Before running pg_upgrade you must:\n"

View File

@ -230,10 +230,11 @@ typedef struct
} ControlData;
/*
* Enumeration to denote link modes
* Enumeration to denote transfer modes
*/
typedef enum
{
TRANSFER_MODE_CLONE,
TRANSFER_MODE_COPY,
TRANSFER_MODE_LINK
} transferMode;
@ -372,12 +373,15 @@ bool pid_lock_file_exists(const char *datadir);
/* file.c */
void cloneFile(const char *src, const char *dst,
const char *schemaName, const char *relName);
void copyFile(const char *src, const char *dst,
const char *schemaName, const char *relName);
void linkFile(const char *src, const char *dst,
const char *schemaName, const char *relName);
void rewriteVisibilityMap(const char *fromfile, const char *tofile,
const char *schemaName, const char *relName);
void check_file_clone(void);
void check_hard_link(void);
/* fopen_priv() is no longer different from fopen() */

View File

@ -30,10 +30,18 @@ void
transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
char *old_pgdata, char *new_pgdata)
{
if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
pg_log(PG_REPORT, "Linking user relation files\n");
else
switch (user_opts.transfer_mode)
{
case TRANSFER_MODE_CLONE:
pg_log(PG_REPORT, "Cloning user relation files\n");
break;
case TRANSFER_MODE_COPY:
pg_log(PG_REPORT, "Copying user relation files\n");
break;
case TRANSFER_MODE_LINK:
pg_log(PG_REPORT, "Linking user relation files\n");
break;
}
/*
* Transferring files by tablespace is tricky because a single database
@ -250,14 +258,20 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
old_file, new_file);
rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname);
}
else if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
else
switch (user_opts.transfer_mode)
{
case TRANSFER_MODE_CLONE:
pg_log(PG_VERBOSE, "cloning \"%s\" to \"%s\"\n",
old_file, new_file);
cloneFile(old_file, new_file, map->nspname, map->relname);
break;
case TRANSFER_MODE_COPY:
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
old_file, new_file);
copyFile(old_file, new_file, map->nspname, map->relname);
}
else
{
break;
case TRANSFER_MODE_LINK:
pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n",
old_file, new_file);
linkFile(old_file, new_file, map->nspname, map->relname);

View File

@ -114,6 +114,9 @@
/* Define to 1 if your compiler handles computed gotos. */
#undef HAVE_COMPUTED_GOTO
/* Define to 1 if you have the `copyfile' function. */
#undef HAVE_COPYFILE
/* Define to 1 if you have the <crtdefs.h> header file. */
#undef HAVE_CRTDEFS_H