Skip to content

Commit d75c402

Browse files
committed
Fix incremental backup interaction with XLOG_DBASE_CREATE_FILE_COPY.
After XLOG_DBASE_CREATE_FILE_COPY, a correct incremental backup needs to copy in full everything with the database and tablespace OID mentioned in that record; but that record doesn't specifically mention the blocks, or even the relfilenumbers, of the affected relations. As a result, we were failing to copy data that we should have copied. To fix, enter the DB OID and tablespace OID into the block reference table with relfilenumber 0 and limit block 0; and treat that as a limit block of 0 for every relfilenumber whose DB OID and tablespace OID match. Also, add a test case. Patch by me, reviewed by Noah Misch. Discussion: http://postgr.es/m/CA+Tgmob0xa=ByvGLMdAgkUZyVQE=r4nyYZ_VEa40FCfEDFnTKA@mail.gmail.com
1 parent cb6945d commit d75c402

File tree

4 files changed

+151
-1
lines changed

4 files changed

+151
-1
lines changed

src/backend/backup/basebackup_incremental.c

+17-1
Original file line numberDiff line numberDiff line change
@@ -777,9 +777,25 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
777777
return BACK_UP_FILE_FULLY;
778778
}
779779

780-
/* Look up the block reference table entry. */
780+
/*
781+
* Look up the special block reference table entry for the database as
782+
* a whole.
783+
*/
781784
rlocator.spcOid = spcoid;
782785
rlocator.dbOid = dboid;
786+
rlocator.relNumber = 0;
787+
if (BlockRefTableGetEntry(ib->brtab, &rlocator, MAIN_FORKNUM,
788+
&limit_block) != NULL)
789+
{
790+
/*
791+
* According to the WAL summary, this database OID/tablespace OID
792+
* pairing has been created since the previous backup. So, everything
793+
* in it must be backed up fully.
794+
*/
795+
return BACK_UP_FILE_FULLY;
796+
}
797+
798+
/* Look up the block reference table entry for this relfilenode. */
783799
rlocator.relNumber = relfilenumber;
784800
brtentry = BlockRefTableGetEntry(ib->brtab, &rlocator, forknum,
785801
&limit_block);

src/backend/postmaster/walsummarizer.c

+75
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "access/xlogutils.h"
3030
#include "backup/walsummary.h"
3131
#include "catalog/storage_xlog.h"
32+
#include "commands/dbcommands_xlog.h"
3233
#include "common/blkreftable.h"
3334
#include "libpq/pqsignal.h"
3435
#include "miscadmin.h"
@@ -146,6 +147,8 @@ static void HandleWalSummarizerInterrupts(void);
146147
static XLogRecPtr SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn,
147148
bool exact, XLogRecPtr switch_lsn,
148149
XLogRecPtr maximum_lsn);
150+
static void SummarizeDbaseRecord(XLogReaderState *xlogreader,
151+
BlockRefTable *brtab);
149152
static void SummarizeSmgrRecord(XLogReaderState *xlogreader,
150153
BlockRefTable *brtab);
151154
static void SummarizeXactRecord(XLogReaderState *xlogreader,
@@ -961,6 +964,9 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
961964
/* Special handling for particular types of WAL records. */
962965
switch (XLogRecGetRmid(xlogreader))
963966
{
967+
case RM_DBASE_ID:
968+
SummarizeDbaseRecord(xlogreader, brtab);
969+
break;
964970
case RM_SMGR_ID:
965971
SummarizeSmgrRecord(xlogreader, brtab);
966972
break;
@@ -1074,6 +1080,75 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
10741080
return summary_end_lsn;
10751081
}
10761082

1083+
/*
1084+
* Special handling for WAL records with RM_DBASE_ID.
1085+
*/
1086+
static void
1087+
SummarizeDbaseRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1088+
{
1089+
uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1090+
1091+
/*
1092+
* We use relfilenode zero for a given database OID and tablespace OID
1093+
* to indicate that all relations with that pair of IDs have been
1094+
* recreated if they exist at all. Effectively, we're setting a limit
1095+
* block of 0 for all such relfilenodes.
1096+
*
1097+
* Technically, this special handling is only needed in the case of
1098+
* XLOG_DBASE_CREATE_FILE_COPY, because that can create a whole bunch
1099+
* of relation files in a directory without logging anything
1100+
* specific to each one. If we didn't mark the whole DB OID/TS OID
1101+
* combination in some way, then a tablespace that was dropped after
1102+
* the reference backup and recreated using the FILE_COPY method prior
1103+
* to the incremental backup would look just like one that was never
1104+
* touched at all, which would be catastrophic.
1105+
*
1106+
* But it seems best to adopt this treatment for all records that drop
1107+
* or create a DB OID/TS OID combination. That's similar to how we
1108+
* treat the limit block for individual relations, and it's an extra
1109+
* layer of safety here. We can never lose data by marking more stuff
1110+
* as needing to be backed up in full.
1111+
*/
1112+
if (info == XLOG_DBASE_CREATE_FILE_COPY)
1113+
{
1114+
xl_dbase_create_file_copy_rec *xlrec;
1115+
RelFileLocator rlocator;
1116+
1117+
xlrec =
1118+
(xl_dbase_create_file_copy_rec *) XLogRecGetData(xlogreader);
1119+
rlocator.spcOid = xlrec->tablespace_id;
1120+
rlocator.dbOid = xlrec->db_id;
1121+
rlocator.relNumber = 0;
1122+
BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1123+
}
1124+
else if (info == XLOG_DBASE_CREATE_WAL_LOG)
1125+
{
1126+
xl_dbase_create_wal_log_rec *xlrec;
1127+
RelFileLocator rlocator;
1128+
1129+
xlrec = (xl_dbase_create_wal_log_rec *) XLogRecGetData(xlogreader);
1130+
rlocator.spcOid = xlrec->tablespace_id;
1131+
rlocator.dbOid = xlrec->db_id;
1132+
rlocator.relNumber = 0;
1133+
BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1134+
}
1135+
else if (info == XLOG_DBASE_DROP)
1136+
{
1137+
xl_dbase_drop_rec *xlrec;
1138+
RelFileLocator rlocator;
1139+
int i;
1140+
1141+
xlrec = (xl_dbase_drop_rec *) XLogRecGetData(xlogreader);
1142+
rlocator.dbOid = xlrec->db_id;
1143+
rlocator.relNumber = 0;
1144+
for (i = 0; i < xlrec->ntablespaces; ++i)
1145+
{
1146+
rlocator.spcOid = xlrec->tablespace_ids[i];
1147+
BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1148+
}
1149+
}
1150+
}
1151+
10771152
/*
10781153
* Special handling for WAL records with RM_SMGR_ID.
10791154
*/

src/bin/pg_combinebackup/meson.build

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ tests += {
3333
't/003_timeline.pl',
3434
't/004_manifest.pl',
3535
't/005_integrity.pl',
36+
't/006_db_file_copy.pl',
3637
],
3738
}
3839
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
2+
3+
use strict;
4+
use warnings FATAL => 'all';
5+
use File::Compare;
6+
use PostgreSQL::Test::Cluster;
7+
use PostgreSQL::Test::Utils;
8+
use Test::More;
9+
10+
# Set up a new database instance.
11+
my $primary = PostgreSQL::Test::Cluster->new('primary');
12+
$primary->init(has_archiving => 1, allows_streaming => 1);
13+
$primary->append_conf('postgresql.conf', 'summarize_wal = on');
14+
$primary->start;
15+
16+
# Initial setup.
17+
$primary->safe_psql('postgres', <<EOM);
18+
CREATE DATABASE lakh OID = 100000 STRATEGY = FILE_COPY
19+
EOM
20+
$primary->safe_psql('lakh', <<EOM);
21+
CREATE TABLE t1 (a int)
22+
EOM
23+
24+
# Take a full backup.
25+
my $backup1path = $primary->backup_dir . '/backup1';
26+
$primary->command_ok(
27+
[ 'pg_basebackup', '-D', $backup1path, '--no-sync', '-cfast' ],
28+
"full backup");
29+
30+
# Now make some database changes.
31+
$primary->safe_psql('postgres', <<EOM);
32+
DROP DATABASE lakh;
33+
CREATE DATABASE lakh OID = 100000 STRATEGY = FILE_COPY
34+
EOM
35+
36+
# Take an incremental backup.
37+
my $backup2path = $primary->backup_dir . '/backup2';
38+
$primary->command_ok(
39+
[ 'pg_basebackup', '-D', $backup2path, '--no-sync', '-cfast',
40+
'--incremental', $backup1path . '/backup_manifest' ],
41+
"incremental backup");
42+
43+
# Recover the incremental backup.
44+
my $restore = PostgreSQL::Test::Cluster->new('restore');
45+
$restore->init_from_backup($primary, 'backup2',
46+
combine_with_prior => [ 'backup1' ]);
47+
$restore->start();
48+
49+
# Query the DB.
50+
my $stdout;
51+
my $stderr;
52+
$restore->psql('lakh', 'SELECT * FROM t1',
53+
stdout => \$stdout, stderr => \$stderr);
54+
is($stdout, '', 'SELECT * FROM t1: no stdout');
55+
like($stderr, qr/relation "t1" does not exist/,
56+
'SELECT * FROM t1: stderr missing table');
57+
58+
done_testing();

0 commit comments

Comments
 (0)