Skip to content

Commit 09e96b3

Browse files
committed
Fix handling of files that source server removes during pg_rewind is running.
After processing the filemap to build the list of chunks that will be fetched from the source to rewing the target server, it is possible that a file which was previously processed is removed from the source. A simple example of such an occurence is a WAL segment which gets recycled on the target in-between. When the filemap is processed, files not categorized as relation files are first truncated to prepare for its full copy of which is going to be taken from the source, divided into a set of junks. However, for a recycled WAL segment, this would result in a segment which has a zero-byte size. With such an empty file, post-rewind recovery thinks that records are saved but they are actually not because of the truncation which happened when processing the filemap, resulting in data loss. In order to fix the problem, make sure that files which are found as removed on the source when receiving chunks of them are as well deleted on the target server for consistency. Back-patch to 9.5 where pg_rewind was added. Author: Tsunakawa Takayuki Reviewed-by: Michael Paquier Reported-by: Tsunakawa Takayuki Discussion: https://postgr.es/m/0A3221C70F24FB45833433255569204D1F8DAAA2%40G01JPEXMBYT05
1 parent d92bc83 commit 09e96b3

File tree

3 files changed

+20
-7
lines changed

3 files changed

+20
-7
lines changed

src/bin/pg_rewind/file_ops.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
static int dstfd = -1;
3030
static char dstpath[MAXPGPATH] = "";
3131

32-
static void remove_target_file(const char *path);
3332
static void create_target_dir(const char *path);
3433
static void remove_target_dir(const char *path);
3534
static void create_target_symlink(const char *path, const char *link);
@@ -134,7 +133,7 @@ remove_target(file_entry_t *entry)
134133
break;
135134

136135
case FILE_TYPE_REGULAR:
137-
remove_target_file(entry->path);
136+
remove_target_file(entry->path, false);
138137
break;
139138

140139
case FILE_TYPE_SYMLINK:
@@ -165,8 +164,12 @@ create_target(file_entry_t *entry)
165164
}
166165
}
167166

168-
static void
169-
remove_target_file(const char *path)
167+
/*
168+
* Remove a file from target data directory. If missing_ok is true, it
169+
* is fine for the target file to not exist.
170+
*/
171+
void
172+
remove_target_file(const char *path, bool missing_ok)
170173
{
171174
char dstpath[MAXPGPATH];
172175

@@ -175,8 +178,13 @@ remove_target_file(const char *path)
175178

176179
snprintf(dstpath, sizeof(dstpath), "%s/%s", datadir_target, path);
177180
if (unlink(dstpath) != 0)
181+
{
182+
if (errno == ENOENT && missing_ok)
183+
return;
184+
178185
pg_fatal("could not remove file \"%s\": %s\n",
179186
dstpath, strerror(errno));
187+
}
180188
}
181189

182190
void

src/bin/pg_rewind/file_ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
extern void open_target_file(const char *path, bool trunc);
1616
extern void write_target_range(char *buf, off_t begin, size_t size);
1717
extern void close_target_file(void);
18+
extern void remove_target_file(const char *path, bool missing_ok);
1819
extern void truncate_target_file(const char *path, off_t newsize);
1920
extern void create_target(file_entry_t *t);
2021
extern void remove_target(file_entry_t *t);

src/bin/pg_rewind/libpq_fetch.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,15 +311,19 @@ receiveFileChunks(const char *sql)
311311
chunk = PQgetvalue(res, 0, 2);
312312

313313
/*
314-
* It's possible that the file was deleted on remote side after we
315-
* created the file map. In this case simply ignore it, as if it was
316-
* not there in the first place, and move on.
314+
* If a file has been deleted on the source, remove it on the target
315+
* as well. Note that multiple unlink() calls may happen on the same
316+
* file if multiple data chunks are associated with it, hence ignore
317+
* unconditionally anything missing. If this file is not a relation
318+
* data file, then it has been already truncated when creating the
319+
* file chunk list at the previous execution of the filemap.
317320
*/
318321
if (PQgetisnull(res, 0, 2))
319322
{
320323
pg_log(PG_DEBUG,
321324
"received null value for chunk for file \"%s\", file has been deleted\n",
322325
filename);
326+
remove_target_file(filename, true);
323327
pg_free(filename);
324328
PQclear(res);
325329
continue;

0 commit comments

Comments
 (0)