Skip to content

Commit 60df192

Browse files
committed
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated when restored from the archive. Before, they were restored under a temporary filename, and not kept in pg_xlog, but after the patch, they were copied under pg_xlog. This is necessary for a cascading standby to find them, but it also means that if the archive goes offline and a standby is restarted, it can recover back to where it was using the files in pg_xlog. It also means that if you take an offline backup from a standby server, it includes all the required WAL files in pg_xlog. However, the same change was not made to timeline history files, so if the WAL segment containing the checkpoint record contains a timeline switch, you will still get an error if you try to restart recovery without the archive, or recover from an offline backup taken from the standby. With this patch, timeline history files restored from archive are copied into pg_xlog like WAL files are, so that pg_xlog contains all the files required to recover. This is a corner-case pre-existing issue in 9.2, but even more important in master where it's possible for a standby to follow a timeline switch through streaming replication. To make that possible, the timeline history files must be present in pg_xlog.
1 parent 103cc89 commit 60df192

File tree

4 files changed

+88
-59
lines changed

4 files changed

+88
-59
lines changed

src/backend/access/transam/timeline.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ readTimeLineHistory(TimeLineID targetTLI)
5959
TimeLineHistoryEntry *entry;
6060
TimeLineID lasttli = 0;
6161
XLogRecPtr prevend;
62+
bool fromArchive = false;
6263

6364
/* Timeline 1 does not have a history file, so no need to check */
6465
if (targetTLI == 1)
@@ -72,7 +73,8 @@ readTimeLineHistory(TimeLineID targetTLI)
7273
if (InArchiveRecovery)
7374
{
7475
TLHistoryFileName(histfname, targetTLI);
75-
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
76+
fromArchive =
77+
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0, false);
7678
}
7779
else
7880
TLHistoryFilePath(path, targetTLI);
@@ -165,6 +167,13 @@ readTimeLineHistory(TimeLineID targetTLI)
165167

166168
result = lcons(entry, result);
167169

170+
/*
171+
* If the history file was fetched from archive, save it in pg_xlog for
172+
* future reference.
173+
*/
174+
if (fromArchive)
175+
KeepFileRestoredFromArchive(path, histfname);
176+
168177
return result;
169178
}
170179

src/backend/access/transam/xlog.c

+2-58
Original file line numberDiff line numberDiff line change
@@ -2626,68 +2626,12 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
26262626
*/
26272627
if (source == XLOG_FROM_ARCHIVE)
26282628
{
2629-
char xlogfpath[MAXPGPATH];
2630-
bool reload = false;
2631-
struct stat statbuf;
2632-
2633-
XLogFilePath(xlogfpath, tli, segno);
2634-
if (stat(xlogfpath, &statbuf) == 0)
2635-
{
2636-
char oldpath[MAXPGPATH];
2637-
#ifdef WIN32
2638-
static unsigned int deletedcounter = 1;
2639-
/*
2640-
* On Windows, if another process (e.g a walsender process) holds
2641-
* the file open in FILE_SHARE_DELETE mode, unlink will succeed,
2642-
* but the file will still show up in directory listing until the
2643-
* last handle is closed, and we cannot rename the new file in its
2644-
* place until that. To avoid that problem, rename the old file to
2645-
* a temporary name first. Use a counter to create a unique
2646-
* filename, because the same file might be restored from the
2647-
* archive multiple times, and a walsender could still be holding
2648-
* onto an old deleted version of it.
2649-
*/
2650-
snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
2651-
xlogfpath, deletedcounter++);
2652-
if (rename(xlogfpath, oldpath) != 0)
2653-
{
2654-
ereport(ERROR,
2655-
(errcode_for_file_access(),
2656-
errmsg("could not rename file \"%s\" to \"%s\": %m",
2657-
xlogfpath, oldpath)));
2658-
}
2659-
#else
2660-
strncpy(oldpath, xlogfpath, MAXPGPATH);
2661-
#endif
2662-
if (unlink(oldpath) != 0)
2663-
ereport(FATAL,
2664-
(errcode_for_file_access(),
2665-
errmsg("could not remove file \"%s\": %m",
2666-
xlogfpath)));
2667-
reload = true;
2668-
}
2669-
2670-
if (rename(path, xlogfpath) < 0)
2671-
ereport(ERROR,
2672-
(errcode_for_file_access(),
2673-
errmsg("could not rename file \"%s\" to \"%s\": %m",
2674-
path, xlogfpath)));
2629+
KeepFileRestoredFromArchive(path, xlogfname);
26752630

26762631
/*
26772632
* Set path to point at the new file in pg_xlog.
26782633
*/
2679-
strncpy(path, xlogfpath, MAXPGPATH);
2680-
2681-
/*
2682-
* If the existing segment was replaced, since walsenders might have
2683-
* it open, request them to reload a currently-open segment.
2684-
*/
2685-
if (reload)
2686-
WalSndRqstFileReload();
2687-
2688-
/* Signal walsender that new WAL has arrived */
2689-
if (AllowCascadeReplication())
2690-
WalSndWakeup();
2634+
snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
26912635
}
26922636

26932637
fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);

src/backend/access/transam/xlogarchive.c

+75
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "access/xlog_internal.h"
2525
#include "miscadmin.h"
2626
#include "postmaster/startup.h"
27+
#include "replication/walsender.h"
2728
#include "storage/fd.h"
2829
#include "storage/ipc.h"
2930
#include "storage/lwlock.h"
@@ -416,6 +417,80 @@ ExecuteRecoveryCommand(char *command, char *commandName, bool failOnSignal)
416417
}
417418

418419

420+
/*
421+
* A file was restored from the archive under a temporary filename (path),
422+
* and now we want to keep it. Rename it under the permanent filename in
423+
* in pg_xlog (xlogfname), replacing any existing file with the same name.
424+
*/
425+
void
426+
KeepFileRestoredFromArchive(char *path, char *xlogfname)
427+
{
428+
char xlogfpath[MAXPGPATH];
429+
bool reload = false;
430+
struct stat statbuf;
431+
432+
snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
433+
434+
if (stat(xlogfpath, &statbuf) == 0)
435+
{
436+
char oldpath[MAXPGPATH];
437+
#ifdef WIN32
438+
static unsigned int deletedcounter = 1;
439+
/*
440+
* On Windows, if another process (e.g a walsender process) holds
441+
* the file open in FILE_SHARE_DELETE mode, unlink will succeed,
442+
* but the file will still show up in directory listing until the
443+
* last handle is closed, and we cannot rename the new file in its
444+
* place until that. To avoid that problem, rename the old file to
445+
* a temporary name first. Use a counter to create a unique
446+
* filename, because the same file might be restored from the
447+
* archive multiple times, and a walsender could still be holding
448+
* onto an old deleted version of it.
449+
*/
450+
snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
451+
xlogfpath, deletedcounter++);
452+
if (rename(xlogfpath, oldpath) != 0)
453+
{
454+
ereport(ERROR,
455+
(errcode_for_file_access(),
456+
errmsg("could not rename file \"%s\" to \"%s\": %m",
457+
xlogfpath, oldpath)));
458+
}
459+
#else
460+
strncpy(oldpath, xlogfpath, MAXPGPATH);
461+
#endif
462+
if (unlink(oldpath) != 0)
463+
ereport(FATAL,
464+
(errcode_for_file_access(),
465+
errmsg("could not remove file \"%s\": %m",
466+
xlogfpath)));
467+
reload = true;
468+
}
469+
470+
if (rename(path, xlogfpath) < 0)
471+
ereport(ERROR,
472+
(errcode_for_file_access(),
473+
errmsg("could not rename file \"%s\" to \"%s\": %m",
474+
path, xlogfpath)));
475+
476+
/*
477+
* If the existing file was replaced, since walsenders might have it
478+
* open, request them to reload a currently-open segment. This is only
479+
* required for WAL segments, walsenders don't hold other files open, but
480+
* there's no harm in doing this too often, and we don't know what kind
481+
* of a file we're dealing with here.
482+
*/
483+
if (reload)
484+
WalSndRqstFileReload();
485+
486+
/*
487+
* Signal walsender that new WAL has arrived. Again, this isn't necessary
488+
* if we restored something other than a WAL segment, but it does no harm
489+
* either.
490+
*/
491+
WalSndWakeup();
492+
}
493+
419494
/*
420495
* XLogArchiveNotify
421496
*

src/include/access/xlog_internal.h

+1
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ extern bool RestoreArchivedFile(char *path, const char *xlogfname,
265265
bool cleanupEnabled);
266266
extern void ExecuteRecoveryCommand(char *command, char *commandName,
267267
bool failOnerror);
268+
extern void KeepFileRestoredFromArchive(char *path, char *xlogfname);
268269
extern void XLogArchiveNotify(const char *xlog);
269270
extern void XLogArchiveNotifySeg(XLogSegNo segno);
270271
extern bool XLogArchiveCheckDone(const char *xlog);

0 commit comments

Comments
 (0)