Skip to content

Commit ac81101

Browse files
committed
Allow using copy_file_range in write_reconstructed_file
This commit allows using copy_file_range() for efficient combining of data from multiple files, instead of simply reading/writing the blocks. Depending on the filesystem and other factors (size of the increment, distribution of modified blocks etc.) this may be faster than the block-by-block copy, but more importantly it enables various features provided by CoW filesystems. If a checksum needs to be calculated for the file, the same strategy as when copying whole files is used - copy_file_range is used to copy the blocks, but the file is also read for the checksum calculation. While the checksum calculation is rarely needed when cloning whole files, when reconstructing the files from multiple backups it needs to happen almost always (the only exception is when the user specified --no-manifest). Author: Tomas Vondra Reviewed-by: Thomas Munro, Jakub Wartak, Robert Haas Discussion: https://postgr.es/m/3024283a-7491-4240-80d0-421575f6bb23%40enterprisedb.com
1 parent b8b37e4 commit ac81101

File tree

1 file changed

+106
-28
lines changed

1 file changed

+106
-28
lines changed

src/bin/pg_combinebackup/reconstruct.c

Lines changed: 106 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,14 @@ static void write_reconstructed_file(char *input_filename,
5858
rfile **sourcemap,
5959
off_t *offsetmap,
6060
pg_checksum_context *checksum_ctx,
61+
CopyMethod copy_method,
6162
bool debug,
6263
bool dry_run);
6364
static void read_bytes(rfile *rf, void *buffer, unsigned length);
65+
static void write_block(int wfd, char *output_filename,
66+
uint8 *buffer,
67+
pg_checksum_context *checksum_ctx);
68+
static void read_block(rfile *s, off_t off, uint8 *buffer);
6469

6570
/*
6671
* Reconstruct a full file from an incremental file and a chain of prior
@@ -325,7 +330,8 @@ reconstruct_from_incremental_file(char *input_filename,
325330
{
326331
write_reconstructed_file(input_filename, output_filename,
327332
block_length, sourcemap, offsetmap,
328-
&checksum_ctx, debug, dry_run);
333+
&checksum_ctx, copy_method,
334+
debug, dry_run);
329335
debug_reconstruction(n_prior_backups + 1, source, dry_run);
330336
}
331337

@@ -535,6 +541,7 @@ write_reconstructed_file(char *input_filename,
535541
rfile **sourcemap,
536542
off_t *offsetmap,
537543
pg_checksum_context *checksum_ctx,
544+
CopyMethod copy_method,
538545
bool debug,
539546
bool dry_run)
540547
{
@@ -622,7 +629,6 @@ write_reconstructed_file(char *input_filename,
622629
{
623630
uint8 buffer[BLCKSZ];
624631
rfile *s = sourcemap[i];
625-
int wb;
626632

627633
/* Update accounting information. */
628634
if (s == NULL)
@@ -646,38 +652,61 @@ write_reconstructed_file(char *input_filename,
646652
* uninitialized block, so just zero-fill it.
647653
*/
648654
memset(buffer, 0, BLCKSZ);
649-
}
650-
else
651-
{
652-
int rb;
653655

654-
/* Read the block from the correct source, except if dry-run. */
655-
rb = pg_pread(s->fd, buffer, BLCKSZ, offsetmap[i]);
656-
if (rb != BLCKSZ)
657-
{
658-
if (rb < 0)
659-
pg_fatal("could not read file \"%s\": %m", s->filename);
660-
else
661-
pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu",
662-
s->filename, rb, BLCKSZ,
663-
(unsigned long long) offsetmap[i]);
664-
}
656+
/* Write out the block, update the checksum if needed. */
657+
write_block(wfd, output_filename, buffer, checksum_ctx);
658+
659+
/* Nothing else to do for zero-filled blocks. */
660+
continue;
665661
}
666662

667-
/* Write out the block. */
668-
if ((wb = write(wfd, buffer, BLCKSZ)) != BLCKSZ)
663+
/* Copy the block using the appropriate copy method. */
664+
if (copy_method != COPY_METHOD_COPY_FILE_RANGE)
669665
{
670-
if (wb < 0)
671-
pg_fatal("could not write file \"%s\": %m", output_filename);
672-
else
673-
pg_fatal("could not write file \"%s\": wrote only %d of %d bytes",
674-
output_filename, wb, BLCKSZ);
666+
/*
667+
* Read the block from the correct source file, and then write it
668+
* out, possibly with a checksum update.
669+
*/
670+
read_block(s, offsetmap[i], buffer);
671+
write_block(wfd, output_filename, buffer, checksum_ctx);
675672
}
673+
else /* use copy_file_range */
674+
{
675+
/* copy_file_range modifies the offset, so use a local copy */
676+
off_t off = offsetmap[i];
677+
size_t nwritten = 0;
678+
679+
/*
680+
* Retry until we've written all the bytes (the offset is updated
681+
* by copy_file_range, and so is the wfd file offset).
682+
*/
683+
do
684+
{
685+
int wb;
686+
687+
wb = copy_file_range(s->fd, &off, wfd, NULL, BLCKSZ - nwritten, 0);
688+
689+
if (wb < 0)
690+
pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
691+
input_filename, output_filename);
692+
693+
nwritten += wb;
694+
695+
} while (BLCKSZ > nwritten);
696+
697+
/*
698+
* When checksum calculation not needed, we're done, otherwise
699+
* read the block and pass it to the checksum calculation.
700+
*/
701+
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
702+
continue;
703+
704+
read_block(s, offsetmap[i], buffer);
676705

677-
/* Update the checksum computation. */
678-
if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
679-
pg_fatal("could not update checksum of file \"%s\"",
680-
output_filename);
706+
if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
707+
pg_fatal("could not update checksum of file \"%s\"",
708+
output_filename);
709+
}
681710
}
682711

683712
/* Debugging output. */
@@ -693,3 +722,52 @@ write_reconstructed_file(char *input_filename,
693722
if (wfd >= 0 && close(wfd) != 0)
694723
pg_fatal("could not close \"%s\": %m", output_filename);
695724
}
725+
726+
/*
727+
* Write the block into the file (using the file descriptor), and
728+
* if needed update the checksum calculation.
729+
*
730+
* The buffer is expected to contain BLCKSZ bytes. The filename is
731+
* provided only for the error message.
732+
*/
733+
static void
734+
write_block(int fd, char *output_filename,
735+
uint8 *buffer, pg_checksum_context *checksum_ctx)
736+
{
737+
int wb;
738+
739+
if ((wb = write(fd, buffer, BLCKSZ)) != BLCKSZ)
740+
{
741+
if (wb < 0)
742+
pg_fatal("could not write file \"%s\": %m", output_filename);
743+
else
744+
pg_fatal("could not write file \"%s\": wrote only %d of %d bytes",
745+
output_filename, wb, BLCKSZ);
746+
}
747+
748+
/* Update the checksum computation. */
749+
if (pg_checksum_update(checksum_ctx, buffer, BLCKSZ) < 0)
750+
pg_fatal("could not update checksum of file \"%s\"",
751+
output_filename);
752+
}
753+
754+
/*
755+
* Read a block of data (BLCKSZ bytes) into the the buffer.
756+
*/
757+
static void
758+
read_block(rfile *s, off_t off, uint8 *buffer)
759+
{
760+
int rb;
761+
762+
/* Read the block from the correct source, except if dry-run. */
763+
rb = pg_pread(s->fd, buffer, BLCKSZ, off);
764+
if (rb != BLCKSZ)
765+
{
766+
if (rb < 0)
767+
pg_fatal("could not read file \"%s\": %m", s->filename);
768+
else
769+
pg_fatal("could not read file \"%s\": read only %d of %d bytes at offset %llu",
770+
s->filename, rb, BLCKSZ,
771+
(unsigned long long) off);
772+
}
773+
}

0 commit comments

Comments
 (0)