From ac81101551326ddf4c5cb804c75bd3e8c56506ba Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 5 Apr 2024 19:01:16 +0200 Subject: [PATCH] Allow using copy_file_range in write_reconstructed_file This commit allows using copy_file_range() for efficient combining of data from multiple files, instead of simply reading/writing the blocks. Depending on the filesystem and other factors (size of the increment, distribution of modified blocks etc.) this may be faster than the block-by-block copy, but more importantly it enables various features provided by CoW filesystems. If a checksum needs to be calculated for the file, the same strategy as when copying whole files is used - copy_file_range is used to copy the blocks, but the file is also read for the checksum calculation. While the checksum calculation is rarely needed when cloning whole files, when reconstructing the files from multiple backups it needs to happen almost always (the only exception is when the user specified --no-manifest). Author: Tomas Vondra Reviewed-by: Thomas Munro, Jakub Wartak, Robert Haas Discussion: https://postgr.es/m/3024283a-7491-4240-80d0-421575f6bb23%40enterprisedb.com --- src/bin/pg_combinebackup/reconstruct.c | 138 +++++++++++++++++++------ 1 file changed, 108 insertions(+), 30 deletions(-) diff --git a/src/bin/pg_combinebackup/reconstruct.c b/src/bin/pg_combinebackup/reconstruct.c index b083c5ce15..b059be8d0a 100644 --- a/src/bin/pg_combinebackup/reconstruct.c +++ b/src/bin/pg_combinebackup/reconstruct.c @@ -58,9 +58,14 @@ static void write_reconstructed_file(char *input_filename, rfile **sourcemap, off_t *offsetmap, pg_checksum_context *checksum_ctx, + CopyMethod copy_method, bool debug, bool dry_run); static void read_bytes(rfile *rf, void *buffer, unsigned length); +static void write_block(int wfd, char *output_filename, + uint8 *buffer, + pg_checksum_context *checksum_ctx); +static void read_block(rfile *s, off_t off, uint8 *buffer); /* * Reconstruct a full file from an incremental file and a chain of prior @@ -325,7 +330,8 @@ reconstruct_from_incremental_file(char *input_filename, { write_reconstructed_file(input_filename, output_filename, block_length, sourcemap, offsetmap, - &checksum_ctx, debug, dry_run); + &checksum_ctx, copy_method, + debug, dry_run); debug_reconstruction(n_prior_backups + 1, source, dry_run); } @@ -535,6 +541,7 @@ write_reconstructed_file(char *input_filename, rfile **sourcemap, off_t *offsetmap, pg_checksum_context *checksum_ctx, + CopyMethod copy_method, bool debug, bool dry_run) { @@ -622,7 +629,6 @@ write_reconstructed_file(char *input_filename, { uint8 buffer[BLCKSZ]; rfile *s = sourcemap[i]; - int wb; /* Update accounting information. */ if (s == NULL) @@ -646,38 +652,61 @@ write_reconstructed_file(char *input_filename, * uninitialized block, so just zero-fill it. */ memset(buffer, 0, BLCKSZ); - } - else - { - int rb; - /* Read the block from the correct source, except if dry-run. */ - rb = pg_pread(s->fd, buffer, BLCKSZ, offsetmap[i]); - if (rb != BLCKSZ) + /* Write out the block, update the checksum if needed. */ + write_block(wfd, output_filename, buffer, checksum_ctx); + + /* Nothing else to do for zero-filled blocks. */ + continue; + } + + /* Copy the block using the appropriate copy method. */ + if (copy_method != COPY_METHOD_COPY_FILE_RANGE) + { + /* + * Read the block from the correct source file, and then write it + * out, possibly with a checksum update. + */ + read_block(s, offsetmap[i], buffer); + write_block(wfd, output_filename, buffer, checksum_ctx); + } + else /* use copy_file_range */ + { + /* copy_file_range modifies the offset, so use a local copy */ + off_t off = offsetmap[i]; + size_t nwritten = 0; + + /* + * Retry until we've written all the bytes (the offset is updated + * by copy_file_range, and so is the wfd file offset). + */ + do { - if (rb < 0) - pg_fatal("could not read file \"%s\": %m", s->filename); - else - pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu", - s->filename, rb, BLCKSZ, - (unsigned long long) offsetmap[i]); - } - } + int wb; - /* Write out the block. */ - if ((wb = write(wfd, buffer, BLCKSZ)) != BLCKSZ) - { - if (wb < 0) - pg_fatal("could not write file \"%s\": %m", output_filename); - else - pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", - output_filename, wb, BLCKSZ); - } + wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0); - /* Update the checksum computation. */ - if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) - pg_fatal("could not update checksum of file \"%s\"", - output_filename); + if (wb < 0) + pg_fatal("error while copying file range from \"%s\" to \"%s\": %m", + input_filename, output_filename); + + nwritten += wb; + + } while (BLCKSZ > nwritten); + + /* + * When checksum calculation not needed, we're done, otherwise + * read the block and pass it to the checksum calculation. + */ + if (checksum_ctx->type == CHECKSUM_TYPE_NONE) + continue; + + read_block(s, offsetmap[i], buffer); + + if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + pg_fatal("could not update checksum of file \"%s\"", + output_filename); + } } /* Debugging output. */ @@ -693,3 +722,52 @@ write_reconstructed_file(char *input_filename, if (wfd >= 0 && close(wfd) != 0) pg_fatal("could not close \"%s\": %m", output_filename); } + +/* + * Write the block into the file (using the file descriptor), and + * if needed update the checksum calculation. + * + * The buffer is expected to contain BLCKSZ bytes. The filename is + * provided only for the error message. + */ +static void +write_block(int fd, char *output_filename, + uint8 *buffer, pg_checksum_context *checksum_ctx) +{ + int wb; + + if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ) + { + if (wb < 0) + pg_fatal("could not write file \"%s\": %m", output_filename); + else + pg_fatal("could not write file \"%s\": wrote only %d of %d bytes", + output_filename, wb, BLCKSZ); + } + + /* Update the checksum computation. */ + if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0) + pg_fatal("could not update checksum of file \"%s\"", + output_filename); +} + +/* + * Read a block of data (BLCKSZ bytes) into the the buffer. + */ +static void +read_block(rfile *s, off_t off, uint8 *buffer) +{ + int rb; + + /* Read the block from the correct source, except if dry-run. */ + rb = pg_pread(s->fd, buffer, BLCKSZ, off); + if (rb != BLCKSZ) + { + if (rb < 0) + pg_fatal("could not read file \"%s\": %m", s->filename); + else + pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu", + s->filename, rb, BLCKSZ, + (unsigned long long) off); + } +}