Skip to content

Commit f8ce4ed

Browse files
committed
Allow copying files using clone/copy_file_range
Adds --clone/--copy-file-range options to pg_combinebackup, to allow copying files using file cloning or copy_file_range(). These methods may be faster than the standard block-by-block copy, but the main advantage is that they enable various features provided by CoW filesystems. This commit only uses these copy methods for files that did not change and can be copied as a whole from a single backup. These new copy methods may not be available on all platforms, in which case the command throws an error (immediately, even if no files would be copied as a whole). This early failure seems better than failing later when trying to copy the first file, after performing a lot of work on earlier files. If the requested copy method is available, but a checksum needs to be recalculated (e.g. because of a different checksum type), the file is still copied using the requested method, but it is also read for the checksum calculation. Depending on the filesystem this may be more expensive than just performing the simple copy, but it does enable the CoW benefits. Initial patch by Jakub Wartak, various reworks and improvements by me. Author: Tomas Vondra, Jakub Wartak Reviewed-by: Thomas Munro, Jakub Wartak, Robert Haas Discussion: https://postgr.es/m/3024283a-7491-4240-80d0-421575f6bb23%40enterprisedb.com
1 parent 3c5ff36 commit f8ce4ed

File tree

7 files changed

+278
-41
lines changed

7 files changed

+278
-41
lines changed

doc/src/sgml/ref/pg_combinebackup.sgml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,51 @@ PostgreSQL documentation
185185
</listitem>
186186
</varlistentry>
187187

188+
<varlistentry>
189+
<term><option>--clone</option></term>
190+
<listitem>
191+
<para>
192+
Use efficient file cloning (also known as <quote>reflinks</quote> on
193+
some systems) instead of copying files to the new data directory,
194+
which can result in near-instantaneous copying of the data files.
195+
</para>
196+
197+
<para>
198+
If a backup manifest is not available or does not contain checksum of
199+
the right type, file cloning will be used to copy the file, but the
200+
file will be also read block-by-block for the checksum calculation.
201+
</para>
202+
203+
<para>
204+
File cloning is only supported on some operating systems and file
205+
systems. If it is selected but not supported, the
206+
<application>pg_combinebackup</application> run will error. At present,
207+
it is supported on Linux (kernel 4.5 or later) with Btrfs and XFS (on
208+
file systems created with reflink support), and on macOS with APFS.
209+
</para>
210+
</listitem>
211+
</varlistentry>
212+
213+
<varlistentry>
214+
<term><option>--copy-file-range</option></term>
215+
<listitem>
216+
<para>
217+
Use the <function>copy_file_range</function> system call for efficient
218+
copying. On some file systems this gives results similar to
219+
<option>--clone</option>, sharing physical disk blocks, while on others
220+
it may still copy blocks, but do so via an optimized path. At present,
221+
it is supported on Linux and FreeBSD.
222+
</para>
223+
224+
<para>
225+
If a backup manifest is not available or does not contain checksum of
226+
the right type, <function>copy_file_range</function> will be used to
227+
copy the file, but the file will be also read block-by-block for the
228+
checksum calculation.
229+
</para>
230+
</listitem>
231+
</varlistentry>
232+
188233
<varlistentry>
189234
<term><option>-V</option></term>
190235
<term><option>--version</option></term>

src/bin/pg_combinebackup/copy_file.c

Lines changed: 168 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <copyfile.h>
1515
#endif
1616
#include <fcntl.h>
17+
#include <limits.h>
1718
#include <sys/stat.h>
1819
#include <unistd.h>
1920

@@ -24,8 +25,15 @@
2425
static void copy_file_blocks(const char *src, const char *dst,
2526
pg_checksum_context *checksum_ctx);
2627

28+
static void copy_file_clone(const char *src, const char *dst,
29+
pg_checksum_context *checksum_ctx);
30+
31+
static void copy_file_by_range(const char *src, const char *dst,
32+
pg_checksum_context *checksum_ctx);
33+
2734
#ifdef WIN32
28-
static void copy_file_copyfile(const char *src, const char *dst);
35+
static void copy_file_copyfile(const char *src, const char *dst,
36+
pg_checksum_context *checksum_ctx);
2937
#endif
3038

3139
/*
@@ -35,8 +43,13 @@ static void copy_file_copyfile(const char *src, const char *dst);
3543
*/
3644
void
3745
copy_file(const char *src, const char *dst,
38-
pg_checksum_context *checksum_ctx, bool dry_run)
46+
pg_checksum_context *checksum_ctx,
47+
CopyMethod copy_method, bool dry_run)
3948
{
49+
char *strategy_name = NULL;
50+
void (*strategy_implementation) (const char *, const char *,
51+
pg_checksum_context *checksum_ctx) = NULL;
52+
4053
/*
4154
* In dry-run mode, we don't actually copy anything, nor do we read any
4255
* data from the source file, but we do verify that we can open it.
@@ -51,61 +64,94 @@ copy_file(const char *src, const char *dst,
5164
pg_fatal("could not close \"%s\": %m", src);
5265
}
5366

54-
/*
55-
* If we don't need to compute a checksum, then we can use any special
56-
* operating system primitives that we know about to copy the file; this
57-
* may be quicker than a naive block copy.
58-
*/
59-
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
60-
{
61-
char *strategy_name = NULL;
62-
void (*strategy_implementation) (const char *, const char *) = NULL;
63-
6467
#ifdef WIN32
65-
strategy_name = "CopyFile";
66-
strategy_implementation = copy_file_copyfile;
68+
copy_method = COPY_METHOD_COPYFILE;
6769
#endif
6870

69-
if (strategy_name != NULL)
70-
{
71-
if (dry_run)
72-
pg_log_debug("would copy \"%s\" to \"%s\" using strategy %s",
73-
src, dst, strategy_name);
74-
else
75-
{
76-
pg_log_debug("copying \"%s\" to \"%s\" using strategy %s",
77-
src, dst, strategy_name);
78-
(*strategy_implementation) (src, dst);
79-
}
80-
return;
81-
}
71+
/* Determine the name of the copy strategy for use in log messages. */
72+
switch (copy_method)
73+
{
74+
case COPY_METHOD_CLONE:
75+
strategy_name = "clone";
76+
strategy_implementation = copy_file_clone;
77+
break;
78+
case COPY_METHOD_COPY:
79+
/* leave NULL for simple block-by-block copy */
80+
strategy_implementation = copy_file_blocks;
81+
break;
82+
case COPY_METHOD_COPY_FILE_RANGE:
83+
strategy_name = "copy_file_range";
84+
strategy_implementation = copy_file_by_range;
85+
break;
86+
#ifdef WIN32
87+
case COPY_METHOD_COPYFILE:
88+
strategy_name = "CopyFile";
89+
strategy_implementation = copy_file_copyfile;
90+
break;
91+
#endif
8292
}
8393

84-
/*
85-
* Fall back to the simple approach of reading and writing all the blocks,
86-
* feeding them into the checksum context as we go.
87-
*/
8894
if (dry_run)
8995
{
90-
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
96+
if (strategy_name)
97+
pg_log_debug("would copy \"%s\" to \"%s\" using strategy %s",
98+
src, dst, strategy_name);
99+
else
91100
pg_log_debug("would copy \"%s\" to \"%s\"",
92101
src, dst);
93-
else
94-
pg_log_debug("would copy \"%s\" to \"%s\" and checksum with %s",
95-
src, dst, pg_checksum_type_name(checksum_ctx->type));
96102
}
97103
else
98104
{
99-
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
105+
if (strategy_name)
106+
pg_log_debug("copying \"%s\" to \"%s\" using strategy %s",
107+
src, dst, strategy_name);
108+
else if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
100109
pg_log_debug("copying \"%s\" to \"%s\"",
101110
src, dst);
102111
else
103112
pg_log_debug("copying \"%s\" to \"%s\" and checksumming with %s",
104113
src, dst, pg_checksum_type_name(checksum_ctx->type));
105-
copy_file_blocks(src, dst, checksum_ctx);
114+
115+
strategy_implementation(src, dst, checksum_ctx);
106116
}
107117
}
108118

119+
/*
120+
* Calculate checksum for the src file.
121+
*/
122+
static void
123+
checksum_file(const char *src, pg_checksum_context *checksum_ctx)
124+
{
125+
int src_fd;
126+
uint8 *buffer;
127+
const int buffer_size = 50 * BLCKSZ;
128+
ssize_t rb;
129+
unsigned offset = 0;
130+
131+
/* bail out if no checksum needed */
132+
if (checksum_ctx->type == CHECKSUM_TYPE_NONE)
133+
return;
134+
135+
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
136+
pg_fatal("could not open file \"%s\": %m", src);
137+
138+
buffer = pg_malloc(buffer_size);
139+
140+
while ((rb = read(src_fd, buffer, buffer_size)) > 0)
141+
{
142+
if (pg_checksum_update(checksum_ctx, buffer, rb) < 0)
143+
pg_fatal("could not update checksum of file \"%s\"", src);
144+
145+
offset += rb;
146+
}
147+
148+
if (rb < 0)
149+
pg_fatal("could not read file \"%s\": %m", src);
150+
151+
pg_free(buffer);
152+
close(src_fd);
153+
}
154+
109155
/*
110156
* Copy a file block by block, and optionally compute a checksum as we go.
111157
*/
@@ -156,14 +202,98 @@ copy_file_blocks(const char *src, const char *dst,
156202
close(dest_fd);
157203
}
158204

205+
/*
206+
* copy_file_clone
207+
* Clones/reflinks a file from src to dest.
208+
*
209+
* If needed, also reads the file and calculates the checksum.
210+
*/
211+
static void
212+
copy_file_clone(const char *src, const char *dest,
213+
pg_checksum_context *checksum_ctx)
214+
{
215+
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
216+
if (copyfile(src, dest, NULL, COPYFILE_CLONE_FORCE) < 0)
217+
pg_fatal("error while cloning file \"%s\" to \"%s\": %m", src, dest);
218+
#elif defined(__linux__) && defined(FICLONE)
219+
{
220+
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
221+
pg_fatal("could not open file \"%s\": %m", src);
222+
223+
if ((dest_fd = open(dest, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
224+
pg_file_create_mode)) < 0)
225+
pg_fatal("could not create file \"%s\": %m", dest);
226+
227+
if (ioctl(dest_fd, FICLONE, src_fd) < 0)
228+
{
229+
int save_errno = errno;
230+
231+
unlink(dest);
232+
233+
pg_fatal("error while cloning file \"%s\" to \"%s\": %s",
234+
src, dest);
235+
}
236+
}
237+
#else
238+
pg_fatal("file cloning not supported on this platform");
239+
#endif
240+
241+
/* if needed, calculate checksum of the file */
242+
checksum_file(src, checksum_ctx);
243+
}
244+
245+
/*
246+
* copy_file_by_range
247+
* Copies a file from src to dest using copy_file_range system call.
248+
*
249+
* If needed, also reads the file and calculates the checksum.
250+
*/
251+
static void
252+
copy_file_by_range(const char *src, const char *dest,
253+
pg_checksum_context *checksum_ctx)
254+
{
255+
#if defined(HAVE_COPY_FILE_RANGE)
256+
int src_fd;
257+
int dest_fd;
258+
ssize_t nbytes;
259+
260+
if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
261+
pg_fatal("could not open file \"%s\": %m", src);
262+
263+
if ((dest_fd = open(dest, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
264+
pg_file_create_mode)) < 0)
265+
pg_fatal("could not create file \"%s\": %m", dest);
266+
267+
do
268+
{
269+
nbytes = copy_file_range(src_fd, NULL, dest_fd, NULL, SSIZE_MAX, 0);
270+
if (nbytes < 0)
271+
pg_fatal("error while copying file range from \"%s\" to \"%s\": %m",
272+
src, dest);
273+
} while (nbytes > 0);
274+
275+
close(src_fd);
276+
close(dest_fd);
277+
#else
278+
pg_fatal("copy_file_range not supported on this platform");
279+
#endif
280+
281+
/* if needed, calculate checksum of the file */
282+
checksum_file(src, checksum_ctx);
283+
}
284+
159285
#ifdef WIN32
160286
static void
161-
copy_file_copyfile(const char *src, const char *dst)
287+
copy_file_copyfile(const char *src, const char *dst,
288+
pg_checksum_context *checksum_ctx)
162289
{
163290
if (CopyFile(src, dst, true) == 0)
164291
{
165292
_dosmaperr(GetLastError());
166293
pg_fatal("could not copy \"%s\" to \"%s\": %m", src, dst);
167294
}
295+
296+
/* if needed, calculate checksum of the file */
297+
checksum_file(src, checksum_ctx);
168298
}
169299
#endif /* WIN32 */

src/bin/pg_combinebackup/copy_file.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,25 @@
1111
#ifndef COPY_FILE_H
1212
#define COPY_FILE_H
1313

14+
#include "c.h"
1415
#include "common/checksum_helper.h"
16+
#include "common/file_utils.h"
17+
18+
/*
19+
* Enumeration to denote copy modes.
20+
*/
21+
typedef enum CopyMethod
22+
{
23+
COPY_METHOD_CLONE,
24+
COPY_METHOD_COPY,
25+
COPY_METHOD_COPY_FILE_RANGE,
26+
#ifdef WIN32
27+
COPY_METHOD_COPYFILE,
28+
#endif
29+
} CopyMethod;
1530

1631
extern void copy_file(const char *src, const char *dst,
17-
pg_checksum_context *checksum_ctx, bool dry_run);
32+
pg_checksum_context *checksum_ctx,
33+
CopyMethod copy_method, bool dry_run);
1834

1935
#endif /* COPY_FILE_H */

0 commit comments

Comments
 (0)