Skip to content

Commit 5390887

Browse files
committed
fix WAL validation for stream backups
1 parent a00f9f1 commit 5390887

File tree

1 file changed

+142
-47
lines changed

1 file changed

+142
-47
lines changed

parsexlog.c

Lines changed: 142 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,98 @@ extractPageMap(const char *archivedir, XLogRecPtr startpoint, TimeLineID tli,
166166
}
167167
}
168168

169-
/* TODO Add comment, review */
169+
/*
170+
* Ensure that the backup has all wal files needed for recovery to consistent state.
171+
*/
172+
static void
173+
validate_backup_wal_from_start_to_stop(pgBackup *backup,
174+
char *backup_xlog_path,
175+
TimeLineID tli)
176+
{
177+
XLogRecPtr startpoint = backup->start_lsn;
178+
XLogRecord *record;
179+
XLogReaderState *xlogreader;
180+
char *errormsg;
181+
XLogPageReadPrivate private;
182+
bool got_endpoint = false;
183+
184+
private.archivedir = backup_xlog_path;
185+
private.tli = tli;
186+
187+
/* We will check it in the end */
188+
xlogfpath[0] = '\0';
189+
190+
xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
191+
if (xlogreader == NULL)
192+
elog(ERROR, "out of memory");
193+
194+
while (true)
195+
{
196+
record = XLogReadRecord(xlogreader, startpoint, &errormsg);
197+
198+
if (record == NULL)
199+
{
200+
if (errormsg)
201+
elog(WARNING, "%s", errormsg);
202+
203+
break;
204+
}
205+
206+
/* Got WAL record at stop_lsn */
207+
if (xlogreader->ReadRecPtr == backup->stop_lsn)
208+
{
209+
got_endpoint = true;
210+
break;
211+
}
212+
startpoint = InvalidXLogRecPtr; /* continue reading at next record */
213+
}
214+
215+
if (!got_endpoint)
216+
{
217+
if (xlogfpath[0] != 0)
218+
{
219+
/* XLOG reader couldn't read WAL segment.
220+
* We throw a WARNING here to be able to update backup status below.
221+
*/
222+
if (!xlogexists)
223+
{
224+
elog(WARNING, "WAL segment \"%s\" is absent", xlogfpath);
225+
}
226+
else if (xlogreadfd != -1)
227+
{
228+
elog(WARNING, "Possible WAL CORRUPTION."
229+
"Error has occured during reading WAL segment \"%s\"", xlogfpath);
230+
}
231+
}
232+
233+
/*
234+
* If we don't have WAL between start_lsn and stop_lsn,
235+
* the backup is definitely corrupted. Update its status.
236+
*/
237+
backup->status = BACKUP_STATUS_CORRUPT;
238+
pgBackupWriteBackupControlFile(backup);
239+
elog(ERROR, "there are not enough WAL records to restore from %X/%X to %X/%X",
240+
(uint32) (backup->start_lsn >> 32),
241+
(uint32) (backup->start_lsn),
242+
(uint32) (backup->stop_lsn >> 32),
243+
(uint32) (backup->stop_lsn));
244+
}
245+
246+
/* clean */
247+
XLogReaderFree(xlogreader);
248+
if (xlogreadfd != -1)
249+
{
250+
close(xlogreadfd);
251+
xlogreadfd = -1;
252+
xlogexists = false;
253+
}
254+
}
255+
256+
/*
257+
* Ensure that the backup has all wal files needed for recovery to consistent
258+
* state. And check if we have in archive all files needed to restore the backup
259+
* up to the given recovery target.
260+
*/
170261
void
171262
validate_wal(pgBackup *backup,
172263
const char *archivedir,
@@ -183,9 +274,33 @@ validate_wal(pgBackup *backup,
183274
TimestampTz last_time = 0;
184275
char last_timestamp[100],
185276
target_timestamp[100];
186-
bool all_wal = false,
187-
got_endpoint = false;
277+
bool all_wal = false;
278+
char backup_xlog_path[MAXPGPATH];
279+
280+
/*
281+
* Check that the backup has all wal files needed
282+
* for recovery to consistent state.
283+
*/
284+
if (backup->stream)
285+
{
286+
sprintf(backup_xlog_path, "%s/%s/%s/%s/%s",
287+
backup_path, BACKUPS_DIR, base36enc(backup->start_time), DATABASE_DIR, PG_XLOG_DIR);
288+
289+
validate_backup_wal_from_start_to_stop(backup, backup_xlog_path, tli);
290+
}
291+
else
292+
validate_backup_wal_from_start_to_stop(backup, (char *) archivedir, tli);
188293

294+
/* If recovery target is provided, ensure that archive exists. */
295+
if (dir_is_empty(archivedir)
296+
&& (TransactionIdIsValid(target_xid) || target_time != 0))
297+
elog(ERROR, "WAL archive is empty. You cannot restore backup to a recovery target without WAL archive.");
298+
299+
/*
300+
* Check if we have in archive all files needed to restore backup
301+
* up to the given recovery target.
302+
* In any case we cannot restore to the point before stop_lsn.
303+
*/
189304
private.archivedir = archivedir;
190305
private.tli = tli;
191306
xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
@@ -195,6 +310,15 @@ validate_wal(pgBackup *backup,
195310
/* We will check it in the end */
196311
xlogfpath[0] = '\0';
197312

313+
/* We can restore at least up to the backup end */
314+
time2iso(last_timestamp, lengthof(last_timestamp), backup->recovery_time);
315+
last_xid = backup->recovery_xid;
316+
317+
if ((TransactionIdIsValid(target_xid) && target_xid == last_xid)
318+
|| (target_time != 0 && backup->recovery_time >= target_time))
319+
all_wal = true;
320+
321+
startpoint = backup->stop_lsn;
198322
while (true)
199323
{
200324
bool timestamp_record;
@@ -208,10 +332,6 @@ validate_wal(pgBackup *backup,
208332
break;
209333
}
210334

211-
/* Got WAL record at stop_lsn */
212-
if (xlogreader->ReadRecPtr == backup->stop_lsn)
213-
got_endpoint = true;
214-
215335
timestamp_record = getRecordTimestamp(xlogreader, &last_time);
216336
if (XLogRecGetXid(xlogreader) != InvalidTransactionId)
217337
last_xid = XLogRecGetXid(xlogreader);
@@ -239,16 +359,9 @@ validate_wal(pgBackup *backup,
239359
startpoint = InvalidXLogRecPtr; /* continue reading at next record */
240360
}
241361

242-
243-
/* TODO Add comment */
244362
if (last_time > 0)
245363
time2iso(last_timestamp, lengthof(last_timestamp),
246364
timestamptz_to_time_t(last_time));
247-
else
248-
time2iso(last_timestamp, lengthof(last_timestamp),
249-
backup->recovery_time);
250-
if (last_xid == InvalidTransactionId)
251-
last_xid = backup->recovery_xid;
252365

253366
/* There are all needed WAL records */
254367
if (all_wal)
@@ -273,39 +386,21 @@ validate_wal(pgBackup *backup,
273386
}
274387
}
275388

276-
if (!got_endpoint)
277-
{
278-
/*
279-
* If we don't have WAL between start_lsn and stop_lsn,
280-
* the backup is definitely corrupted. Update its status.
281-
*/
282-
backup->status = BACKUP_STATUS_CORRUPT;
283-
pgBackupWriteBackupControlFile(backup);
284-
elog(ERROR, "there are not enough WAL records to restore from %X/%X to %X/%X",
285-
(uint32) (backup->start_lsn >> 32),
286-
(uint32) (backup->start_lsn),
287-
(uint32) (backup->stop_lsn >> 32),
288-
(uint32) (backup->stop_lsn));
289-
}
290-
else
291-
{
292-
if (target_time > 0)
293-
time2iso(target_timestamp, lengthof(target_timestamp),
294-
target_time);
295-
296-
elog(WARNING, "recovery can be done up to time %s and xid " XID_FMT,
297-
last_timestamp, last_xid);
298-
299-
if (TransactionIdIsValid(target_xid) && target_time != 0)
300-
elog(ERROR, "not enough WAL records to time %s and xid " XID_FMT,
301-
target_timestamp, target_xid);
302-
else if (TransactionIdIsValid(target_xid))
303-
elog(ERROR, "not enough WAL records to xid " XID_FMT,
304-
target_xid);
305-
else if (target_time != 0)
306-
elog(ERROR, "not enough WAL records to time %s",
307-
target_timestamp);
308-
}
389+
elog(WARNING, "recovery can be done up to time %s and xid " XID_FMT,
390+
last_timestamp, last_xid);
391+
392+
if (target_time > 0)
393+
time2iso(target_timestamp, lengthof(target_timestamp),
394+
target_time);
395+
if (TransactionIdIsValid(target_xid) && target_time != 0)
396+
elog(ERROR, "not enough WAL records to time %s and xid " XID_FMT,
397+
target_timestamp, target_xid);
398+
else if (TransactionIdIsValid(target_xid))
399+
elog(ERROR, "not enough WAL records to xid " XID_FMT,
400+
target_xid);
401+
else if (target_time != 0)
402+
elog(ERROR, "not enough WAL records to time %s",
403+
target_timestamp);
309404
}
310405

311406
/* clean */

0 commit comments

Comments
 (0)