Skip to content

Commit 7e1cf76

Browse files
committed
Keep timeline history files restored from archive in pg_xlog.
The cascading standby patch in 9.2 changed the way WAL files are treated when restored from the archive. Before, they were restored under a temporary filename, and not kept in pg_xlog, but after the patch, they were copied under pg_xlog. This is necessary for a cascading standby to find them, but it also means that if the archive goes offline and a standby is restarted, it can recover back to where it was using the files in pg_xlog. It also means that if you take an offline backup from a standby server, it includes all the required WAL files in pg_xlog. However, the same change was not made to timeline history files, so if the WAL segment containing the checkpoint record contains a timeline switch, you will still get an error if you try to restart recovery without the archive, or recover from an offline backup taken from the standby. With this patch, timeline history files restored from archive are copied into pg_xlog like WAL files are, so that pg_xlog contains all the files required to recover. This is a corner-case pre-existing issue in 9.2, but even more important in master where it's possible for a standby to follow a timeline switch through streaming replication. To make that possible, the timeline history files must be present in pg_xlog.
1 parent b573fc8 commit 7e1cf76

File tree

1 file changed

+90
-65
lines changed
  • src/backend/access/transam

1 file changed

+90
-65
lines changed

src/backend/access/transam/xlog.c

Lines changed: 90 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
648648
bool randAccess);
649649
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
650650
static void XLogFileClose(void);
651+
static void KeepFileRestoredFromArchive(char *path, char *xlogfname);
651652
static bool RestoreArchivedFile(char *path, const char *xlogfname,
652653
const char *recovername, off_t expectedSize);
653654
static void ExecuteRecoveryCommand(char *command, char *commandName,
@@ -2842,74 +2843,12 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli,
28422843
*/
28432844
if (source == XLOG_FROM_ARCHIVE)
28442845
{
2845-
char xlogfpath[MAXPGPATH];
2846-
bool reload = false;
2847-
struct stat statbuf;
2848-
2849-
XLogFilePath(xlogfpath, tli, log, seg);
2850-
if (stat(xlogfpath, &statbuf) == 0)
2851-
{
2852-
char oldpath[MAXPGPATH];
2853-
#ifdef WIN32
2854-
static unsigned int deletedcounter = 1;
2855-
/*
2856-
* On Windows, if another process (e.g a walsender process) holds
2857-
* the file open in FILE_SHARE_DELETE mode, unlink will succeed,
2858-
* but the file will still show up in directory listing until the
2859-
* last handle is closed, and we cannot rename the new file in its
2860-
* place until that. To avoid that problem, rename the old file to
2861-
* a temporary name first. Use a counter to create a unique
2862-
* filename, because the same file might be restored from the
2863-
* archive multiple times, and a walsender could still be holding
2864-
* onto an old deleted version of it.
2865-
*/
2866-
snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
2867-
xlogfpath, deletedcounter++);
2868-
if (rename(xlogfpath, oldpath) != 0)
2869-
{
2870-
ereport(ERROR,
2871-
(errcode_for_file_access(),
2872-
errmsg("could not rename file \"%s\" to \"%s\": %m",
2873-
xlogfpath, oldpath)));
2874-
}
2875-
#else
2876-
strncpy(oldpath, xlogfpath, MAXPGPATH);
2877-
#endif
2878-
if (unlink(oldpath) != 0)
2879-
ereport(FATAL,
2880-
(errcode_for_file_access(),
2881-
errmsg("could not remove file \"%s\": %m",
2882-
xlogfpath)));
2883-
reload = true;
2884-
}
2885-
2886-
if (rename(path, xlogfpath) < 0)
2887-
ereport(ERROR,
2888-
(errcode_for_file_access(),
2889-
errmsg("could not rename file \"%s\" to \"%s\": %m",
2890-
path, xlogfpath)));
2846+
KeepFileRestoredFromArchive(path, xlogfname);
28912847

28922848
/*
28932849
* Set path to point at the new file in pg_xlog.
28942850
*/
2895-
strncpy(path, xlogfpath, MAXPGPATH);
2896-
2897-
/*
2898-
* Create .done file forcibly to prevent the restored segment from
2899-
* being archived again later.
2900-
*/
2901-
XLogArchiveForceDone(xlogfname);
2902-
2903-
/*
2904-
* If the existing segment was replaced, since walsenders might have
2905-
* it open, request them to reload a currently-open segment.
2906-
*/
2907-
if (reload)
2908-
WalSndRqstFileReload();
2909-
2910-
/* Signal walsender that new WAL has arrived */
2911-
if (AllowCascadeReplication())
2912-
WalSndWakeup();
2851+
snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
29132852
}
29142853

29152854
fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
@@ -3024,6 +2963,83 @@ XLogFileClose(void)
30242963
openLogFile = -1;
30252964
}
30262965

2966+
/*
2967+
* A file was restored from the archive under a temporary filename (path),
2968+
* and now we want to keep it. Rename it under the permanent filename in
2969+
* in pg_xlog (xlogfname), replacing any existing file with the same name.
2970+
*/
2971+
static void
2972+
KeepFileRestoredFromArchive(char *path, char *xlogfname)
2973+
{
2974+
char xlogfpath[MAXPGPATH];
2975+
bool reload = false;
2976+
struct stat statbuf;
2977+
2978+
snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
2979+
2980+
if (stat(xlogfpath, &statbuf) == 0)
2981+
{
2982+
char oldpath[MAXPGPATH];
2983+
#ifdef WIN32
2984+
static unsigned int deletedcounter = 1;
2985+
/*
2986+
* On Windows, if another process (e.g a walsender process) holds
2987+
* the file open in FILE_SHARE_DELETE mode, unlink will succeed,
2988+
* but the file will still show up in directory listing until the
2989+
* last handle is closed, and we cannot rename the new file in its
2990+
* place until that. To avoid that problem, rename the old file to
2991+
* a temporary name first. Use a counter to create a unique
2992+
* filename, because the same file might be restored from the
2993+
* archive multiple times, and a walsender could still be holding
2994+
* onto an old deleted version of it.
2995+
*/
2996+
snprintf(oldpath, MAXPGPATH, "%s.deleted%u",
2997+
xlogfpath, deletedcounter++);
2998+
if (rename(xlogfpath, oldpath) != 0)
2999+
{
3000+
ereport(ERROR,
3001+
(errcode_for_file_access(),
3002+
errmsg("could not rename file \"%s\" to \"%s\": %m",
3003+
xlogfpath, oldpath)));
3004+
}
3005+
#else
3006+
strncpy(oldpath, xlogfpath, MAXPGPATH);
3007+
#endif
3008+
if (unlink(oldpath) != 0)
3009+
ereport(FATAL,
3010+
(errcode_for_file_access(),
3011+
errmsg("could not remove file \"%s\": %m",
3012+
xlogfpath)));
3013+
reload = true;
3014+
}
3015+
3016+
if (rename(path, xlogfpath) < 0)
3017+
ereport(ERROR,
3018+
(errcode_for_file_access(),
3019+
errmsg("could not rename file \"%s\" to \"%s\": %m",
3020+
path, xlogfpath)));
3021+
3022+
/*
3023+
* Create .done file forcibly to prevent the restored segment from
3024+
* being archived again later.
3025+
*/
3026+
XLogArchiveForceDone(xlogfname);
3027+
3028+
/*
3029+
* If the existing file was replaced, since walsenders might have it
3030+
* open, request them to reload a currently-open segment. This is only
3031+
* required for WAL segments, walsenders don't hold other files open,
3032+
* but there's no harm in doing this too often, and we don't know what
3033+
* kind of a file we're dealing with here.
3034+
*/
3035+
if (reload)
3036+
WalSndRqstFileReload();
3037+
3038+
/* Signal walsender that new WAL has arrived */
3039+
if (AllowCascadeReplication())
3040+
WalSndWakeup();
3041+
}
3042+
30273043
/*
30283044
* Attempt to retrieve the specified file from off-line archival storage.
30293045
* If successful, fill "path" with its complete path (note that this will be
@@ -4356,6 +4372,7 @@ readTimeLineHistory(TimeLineID targetTLI)
43564372
char histfname[MAXFNAMELEN];
43574373
char fline[MAXPGPATH];
43584374
FILE *fd;
4375+
bool fromArchive = false;
43594376

43604377
/* Timeline 1 does not have a history file, so no need to check */
43614378
if (targetTLI == 1)
@@ -4364,7 +4381,8 @@ readTimeLineHistory(TimeLineID targetTLI)
43644381
if (InArchiveRecovery)
43654382
{
43664383
TLHistoryFileName(histfname, targetTLI);
4367-
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
4384+
fromArchive =
4385+
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
43684386
}
43694387
else
43704388
TLHistoryFilePath(path, targetTLI);
@@ -4433,6 +4451,13 @@ readTimeLineHistory(TimeLineID targetTLI)
44334451
(errmsg_internal("history of timeline %u is %s",
44344452
targetTLI, nodeToString(result))));
44354453

4454+
/*
4455+
* If the history file was fetched from archive, save it in pg_xlog for
4456+
* future reference.
4457+
*/
4458+
if (fromArchive)
4459+
KeepFileRestoredFromArchive(path, histfname);
4460+
44364461
return result;
44374462
}
44384463

0 commit comments

Comments
 (0)