Skip to content

Commit 0531831

Browse files
committed
In basebackup.c, refactor to create verify_page_checksum.
If checksum verification fails for a particular page, we reread the page and try one more time. The code that does this somewhat complex and difficult to follow. Move some of the logic into a new function and rearrange the code a bit to try to make it clearer. This way, we don't need the block_retry Boolean, a couple of other variables move from sendFile() into the new function, and some code is now less deeply indented. Patch by me, reviewed by David Steele. Discussion: http://postgr.es/m/CA+TgmoYt5jXH4U6cu1dm9Oe2FTn1aae6hBNhZzJJjyjbE_zYig@mail.gmail.com
1 parent a956bd3 commit 0531831

File tree

1 file changed

+104
-84
lines changed

1 file changed

+104
-84
lines changed

src/backend/backup/basebackup.c

Lines changed: 104 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo
8383
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
8484
struct stat *statbuf, bool missing_ok, Oid dboid,
8585
backup_manifest_info *manifest, const char *spcoid);
86+
static bool verify_page_checksum(Page page, XLogRecPtr start_lsn,
87+
BlockNumber blkno,
88+
uint16 *expected_checksum);
8689
static void sendFileWithContent(bbsink *sink, const char *filename,
8790
const char *content,
8891
backup_manifest_info *manifest);
@@ -1485,14 +1488,11 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
14851488
{
14861489
int fd;
14871490
BlockNumber blkno = 0;
1488-
bool block_retry = false;
1489-
uint16 checksum;
14901491
int checksum_failures = 0;
14911492
off_t cnt;
14921493
int i;
14931494
pgoff_t len = 0;
14941495
char *page;
1495-
PageHeader phdr;
14961496
int segmentno = 0;
14971497
char *segmentpath;
14981498
bool verify_checksum = false;
@@ -1582,94 +1582,78 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
15821582
{
15831583
for (i = 0; i < cnt / BLCKSZ; i++)
15841584
{
1585+
int reread_cnt;
1586+
uint16 expected_checksum;
1587+
15851588
page = sink->bbs_buffer + BLCKSZ * i;
15861589

1590+
/* If the page is OK, go on to the next one. */
1591+
if (verify_page_checksum(page, sink->bbs_state->startptr,
1592+
blkno + i + segmentno * RELSEG_SIZE,
1593+
&expected_checksum))
1594+
continue;
1595+
15871596
/*
1588-
* Only check pages which have not been modified since the
1589-
* start of the base backup. Otherwise, they might have been
1590-
* written only halfway and the checksum would not be valid.
1591-
* However, replaying WAL would reinstate the correct page in
1592-
* this case. We also skip completely new pages, since they
1593-
* don't have a checksum yet.
1597+
* Retry the block on the first failure. It's possible that
1598+
* we read the first 4K page of the block just before postgres
1599+
* updated the entire block so it ends up looking torn to us.
1600+
* If, before we retry the read, the concurrent write of the
1601+
* block finishes, the page LSN will be updated and we'll
1602+
* realize that we should ignore this block.
1603+
*
1604+
* There's no guarantee that this will actually happen,
1605+
* though: the torn write could take an arbitrarily long time
1606+
* to complete. Retrying multiple times wouldn't fix this
1607+
* problem, either, though it would reduce the chances of it
1608+
* happening in practice. The only real fix here seems to be
1609+
* to have some kind of interlock that allows us to wait until
1610+
* we can be certain that no write to the block is in
1611+
* progress. Since we don't have any such thing right now, we
1612+
* just do this and hope for the best.
15941613
*/
1595-
if (!PageIsNew(page) && PageGetLSN(page) < sink->bbs_state->startptr)
1614+
reread_cnt =
1615+
basebackup_read_file(fd,
1616+
sink->bbs_buffer + BLCKSZ * i,
1617+
BLCKSZ, len + BLCKSZ * i,
1618+
readfilename,
1619+
false);
1620+
if (reread_cnt == 0)
15961621
{
1597-
checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1598-
phdr = (PageHeader) page;
1599-
if (phdr->pd_checksum != checksum)
1600-
{
1601-
/*
1602-
* Retry the block on the first failure. It's
1603-
* possible that we read the first 4K page of the
1604-
* block just before postgres updated the entire block
1605-
* so it ends up looking torn to us. If, before we
1606-
* retry the read, the concurrent write of the block
1607-
* finishes, the page LSN will be updated and we'll
1608-
* realize that we should ignore this block.
1609-
*
1610-
* There's no guarantee that this will actually
1611-
* happen, though: the torn write could take an
1612-
* arbitrarily long time to complete. Retrying
1613-
* multiple times wouldn't fix this problem, either,
1614-
* though it would reduce the chances of it happening
1615-
* in practice. The only real fix here seems to be to
1616-
* have some kind of interlock that allows us to wait
1617-
* until we can be certain that no write to the block
1618-
* is in progress. Since we don't have any such thing
1619-
* right now, we just do this and hope for the best.
1620-
*/
1621-
if (block_retry == false)
1622-
{
1623-
int reread_cnt;
1624-
1625-
/* Reread the failed block */
1626-
reread_cnt =
1627-
basebackup_read_file(fd,
1628-
sink->bbs_buffer + BLCKSZ * i,
1629-
BLCKSZ, len + BLCKSZ * i,
1630-
readfilename,
1631-
false);
1632-
if (reread_cnt == 0)
1633-
{
1634-
/*
1635-
* If we hit end-of-file, a concurrent
1636-
* truncation must have occurred, so break out
1637-
* of this loop just as if the initial fread()
1638-
* returned 0. We'll drop through to the same
1639-
* code that handles that case. (We must fix
1640-
* up cnt first, though.)
1641-
*/
1642-
cnt = BLCKSZ * i;
1643-
break;
1644-
}
1645-
1646-
/* Set flag so we know a retry was attempted */
1647-
block_retry = true;
1648-
1649-
/* Reset loop to validate the block again */
1650-
i--;
1651-
continue;
1652-
}
1653-
1654-
checksum_failures++;
1655-
1656-
if (checksum_failures <= 5)
1657-
ereport(WARNING,
1658-
(errmsg("checksum verification failed in "
1659-
"file \"%s\", block %u: calculated "
1660-
"%X but expected %X",
1661-
readfilename, blkno, checksum,
1662-
phdr->pd_checksum)));
1663-
if (checksum_failures == 5)
1664-
ereport(WARNING,
1665-
(errmsg("further checksum verification "
1666-
"failures in file \"%s\" will not "
1667-
"be reported", readfilename)));
1668-
}
1622+
/*
1623+
* If we hit end-of-file, a concurrent truncation must
1624+
* have occurred, so break out of this loop just as if the
1625+
* initial fread() returned 0. We'll drop through to the
1626+
* same code that handles that case. (We must fix up cnt
1627+
* first, though.)
1628+
*/
1629+
cnt = BLCKSZ * i;
1630+
break;
16691631
}
1670-
block_retry = false;
1671-
blkno++;
1632+
1633+
/* If the page now looks OK, go on to the next one. */
1634+
if (verify_page_checksum(page, sink->bbs_state->startptr,
1635+
blkno + i + segmentno * RELSEG_SIZE,
1636+
&expected_checksum))
1637+
continue;
1638+
1639+
/* Handle checksum failure. */
1640+
checksum_failures++;
1641+
if (checksum_failures <= 5)
1642+
ereport(WARNING,
1643+
(errmsg("checksum verification failed in "
1644+
"file \"%s\", block %u: calculated "
1645+
"%X but expected %X",
1646+
readfilename, blkno + i, expected_checksum,
1647+
((PageHeader) page)->pd_checksum)));
1648+
if (checksum_failures == 5)
1649+
ereport(WARNING,
1650+
(errmsg("further checksum verification "
1651+
"failures in file \"%s\" will not "
1652+
"be reported", readfilename)));
16721653
}
1654+
1655+
/* Update block number for next pass through the outer loop. */
1656+
blkno += i;
16731657
}
16741658

16751659
/*
@@ -1734,6 +1718,42 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
17341718
return true;
17351719
}
17361720

1721+
/*
1722+
* Try to verify the checksum for the provided page, if it seems appropriate
1723+
* to do so.
1724+
*
1725+
* Returns true if verification succeeds or if we decide not to check it,
1726+
* and false if verification fails. When return false, it also sets
1727+
* *expected_checksum to the computed value.
1728+
*/
1729+
static bool
1730+
verify_page_checksum(Page page, XLogRecPtr start_lsn, BlockNumber blkno,
1731+
uint16 *expected_checksum)
1732+
{
1733+
PageHeader phdr;
1734+
uint16 checksum;
1735+
1736+
/*
1737+
* Only check pages which have not been modified since the start of the
1738+
* base backup. Otherwise, they might have been written only halfway and
1739+
* the checksum would not be valid. However, replaying WAL would
1740+
* reinstate the correct page in this case. We also skip completely new
1741+
* pages, since they don't have a checksum yet.
1742+
*/
1743+
if (PageIsNew(page) || PageGetLSN(page) >= start_lsn)
1744+
return true;
1745+
1746+
/* Perform the actual checksum calculation. */
1747+
checksum = pg_checksum_page(page, blkno);
1748+
1749+
/* See whether it matches the value from the page. */
1750+
phdr = (PageHeader) page;
1751+
if (phdr->pd_checksum == checksum)
1752+
return true;
1753+
*expected_checksum = checksum;
1754+
return false;
1755+
}
1756+
17371757
static int64
17381758
_tarWriteHeader(bbsink *sink, const char *filename, const char *linktarget,
17391759
struct stat *statbuf, bool sizeonly)

0 commit comments

Comments
 (0)