@@ -83,6 +83,9 @@ static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeo
83
83
static bool sendFile (bbsink * sink , const char * readfilename , const char * tarfilename ,
84
84
struct stat * statbuf , bool missing_ok , Oid dboid ,
85
85
backup_manifest_info * manifest , const char * spcoid );
86
+ static bool verify_page_checksum (Page page , XLogRecPtr start_lsn ,
87
+ BlockNumber blkno ,
88
+ uint16 * expected_checksum );
86
89
static void sendFileWithContent (bbsink * sink , const char * filename ,
87
90
const char * content ,
88
91
backup_manifest_info * manifest );
@@ -1485,14 +1488,11 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
1485
1488
{
1486
1489
int fd ;
1487
1490
BlockNumber blkno = 0 ;
1488
- bool block_retry = false;
1489
- uint16 checksum ;
1490
1491
int checksum_failures = 0 ;
1491
1492
off_t cnt ;
1492
1493
int i ;
1493
1494
pgoff_t len = 0 ;
1494
1495
char * page ;
1495
- PageHeader phdr ;
1496
1496
int segmentno = 0 ;
1497
1497
char * segmentpath ;
1498
1498
bool verify_checksum = false;
@@ -1582,94 +1582,78 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
1582
1582
{
1583
1583
for (i = 0 ; i < cnt / BLCKSZ ; i ++ )
1584
1584
{
1585
+ int reread_cnt ;
1586
+ uint16 expected_checksum ;
1587
+
1585
1588
page = sink -> bbs_buffer + BLCKSZ * i ;
1586
1589
1590
+ /* If the page is OK, go on to the next one. */
1591
+ if (verify_page_checksum (page , sink -> bbs_state -> startptr ,
1592
+ blkno + i + segmentno * RELSEG_SIZE ,
1593
+ & expected_checksum ))
1594
+ continue ;
1595
+
1587
1596
/*
1588
- * Only check pages which have not been modified since the
1589
- * start of the base backup. Otherwise, they might have been
1590
- * written only halfway and the checksum would not be valid.
1591
- * However, replaying WAL would reinstate the correct page in
1592
- * this case. We also skip completely new pages, since they
1593
- * don't have a checksum yet.
1597
+ * Retry the block on the first failure. It's possible that
1598
+ * we read the first 4K page of the block just before postgres
1599
+ * updated the entire block so it ends up looking torn to us.
1600
+ * If, before we retry the read, the concurrent write of the
1601
+ * block finishes, the page LSN will be updated and we'll
1602
+ * realize that we should ignore this block.
1603
+ *
1604
+ * There's no guarantee that this will actually happen,
1605
+ * though: the torn write could take an arbitrarily long time
1606
+ * to complete. Retrying multiple times wouldn't fix this
1607
+ * problem, either, though it would reduce the chances of it
1608
+ * happening in practice. The only real fix here seems to be
1609
+ * to have some kind of interlock that allows us to wait until
1610
+ * we can be certain that no write to the block is in
1611
+ * progress. Since we don't have any such thing right now, we
1612
+ * just do this and hope for the best.
1594
1613
*/
1595
- if (!PageIsNew (page ) && PageGetLSN (page ) < sink -> bbs_state -> startptr )
1614
+ reread_cnt =
1615
+ basebackup_read_file (fd ,
1616
+ sink -> bbs_buffer + BLCKSZ * i ,
1617
+ BLCKSZ , len + BLCKSZ * i ,
1618
+ readfilename ,
1619
+ false);
1620
+ if (reread_cnt == 0 )
1596
1621
{
1597
- checksum = pg_checksum_page ((char * ) page , blkno + segmentno * RELSEG_SIZE );
1598
- phdr = (PageHeader ) page ;
1599
- if (phdr -> pd_checksum != checksum )
1600
- {
1601
- /*
1602
- * Retry the block on the first failure. It's
1603
- * possible that we read the first 4K page of the
1604
- * block just before postgres updated the entire block
1605
- * so it ends up looking torn to us. If, before we
1606
- * retry the read, the concurrent write of the block
1607
- * finishes, the page LSN will be updated and we'll
1608
- * realize that we should ignore this block.
1609
- *
1610
- * There's no guarantee that this will actually
1611
- * happen, though: the torn write could take an
1612
- * arbitrarily long time to complete. Retrying
1613
- * multiple times wouldn't fix this problem, either,
1614
- * though it would reduce the chances of it happening
1615
- * in practice. The only real fix here seems to be to
1616
- * have some kind of interlock that allows us to wait
1617
- * until we can be certain that no write to the block
1618
- * is in progress. Since we don't have any such thing
1619
- * right now, we just do this and hope for the best.
1620
- */
1621
- if (block_retry == false)
1622
- {
1623
- int reread_cnt ;
1624
-
1625
- /* Reread the failed block */
1626
- reread_cnt =
1627
- basebackup_read_file (fd ,
1628
- sink -> bbs_buffer + BLCKSZ * i ,
1629
- BLCKSZ , len + BLCKSZ * i ,
1630
- readfilename ,
1631
- false);
1632
- if (reread_cnt == 0 )
1633
- {
1634
- /*
1635
- * If we hit end-of-file, a concurrent
1636
- * truncation must have occurred, so break out
1637
- * of this loop just as if the initial fread()
1638
- * returned 0. We'll drop through to the same
1639
- * code that handles that case. (We must fix
1640
- * up cnt first, though.)
1641
- */
1642
- cnt = BLCKSZ * i ;
1643
- break ;
1644
- }
1645
-
1646
- /* Set flag so we know a retry was attempted */
1647
- block_retry = true;
1648
-
1649
- /* Reset loop to validate the block again */
1650
- i -- ;
1651
- continue ;
1652
- }
1653
-
1654
- checksum_failures ++ ;
1655
-
1656
- if (checksum_failures <= 5 )
1657
- ereport (WARNING ,
1658
- (errmsg ("checksum verification failed in "
1659
- "file \"%s\", block %u: calculated "
1660
- "%X but expected %X" ,
1661
- readfilename , blkno , checksum ,
1662
- phdr -> pd_checksum )));
1663
- if (checksum_failures == 5 )
1664
- ereport (WARNING ,
1665
- (errmsg ("further checksum verification "
1666
- "failures in file \"%s\" will not "
1667
- "be reported" , readfilename )));
1668
- }
1622
+ /*
1623
+ * If we hit end-of-file, a concurrent truncation must
1624
+ * have occurred, so break out of this loop just as if the
1625
+ * initial fread() returned 0. We'll drop through to the
1626
+ * same code that handles that case. (We must fix up cnt
1627
+ * first, though.)
1628
+ */
1629
+ cnt = BLCKSZ * i ;
1630
+ break ;
1669
1631
}
1670
- block_retry = false;
1671
- blkno ++ ;
1632
+
1633
+ /* If the page now looks OK, go on to the next one. */
1634
+ if (verify_page_checksum (page , sink -> bbs_state -> startptr ,
1635
+ blkno + i + segmentno * RELSEG_SIZE ,
1636
+ & expected_checksum ))
1637
+ continue ;
1638
+
1639
+ /* Handle checksum failure. */
1640
+ checksum_failures ++ ;
1641
+ if (checksum_failures <= 5 )
1642
+ ereport (WARNING ,
1643
+ (errmsg ("checksum verification failed in "
1644
+ "file \"%s\", block %u: calculated "
1645
+ "%X but expected %X" ,
1646
+ readfilename , blkno + i , expected_checksum ,
1647
+ ((PageHeader ) page )-> pd_checksum )));
1648
+ if (checksum_failures == 5 )
1649
+ ereport (WARNING ,
1650
+ (errmsg ("further checksum verification "
1651
+ "failures in file \"%s\" will not "
1652
+ "be reported" , readfilename )));
1672
1653
}
1654
+
1655
+ /* Update block number for next pass through the outer loop. */
1656
+ blkno += i ;
1673
1657
}
1674
1658
1675
1659
/*
@@ -1734,6 +1718,42 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
1734
1718
return true;
1735
1719
}
1736
1720
1721
+ /*
1722
+ * Try to verify the checksum for the provided page, if it seems appropriate
1723
+ * to do so.
1724
+ *
1725
+ * Returns true if verification succeeds or if we decide not to check it,
1726
+ * and false if verification fails. When return false, it also sets
1727
+ * *expected_checksum to the computed value.
1728
+ */
1729
+ static bool
1730
+ verify_page_checksum (Page page , XLogRecPtr start_lsn , BlockNumber blkno ,
1731
+ uint16 * expected_checksum )
1732
+ {
1733
+ PageHeader phdr ;
1734
+ uint16 checksum ;
1735
+
1736
+ /*
1737
+ * Only check pages which have not been modified since the start of the
1738
+ * base backup. Otherwise, they might have been written only halfway and
1739
+ * the checksum would not be valid. However, replaying WAL would
1740
+ * reinstate the correct page in this case. We also skip completely new
1741
+ * pages, since they don't have a checksum yet.
1742
+ */
1743
+ if (PageIsNew (page ) || PageGetLSN (page ) >= start_lsn )
1744
+ return true;
1745
+
1746
+ /* Perform the actual checksum calculation. */
1747
+ checksum = pg_checksum_page (page , blkno );
1748
+
1749
+ /* See whether it matches the value from the page. */
1750
+ phdr = (PageHeader ) page ;
1751
+ if (phdr -> pd_checksum == checksum )
1752
+ return true;
1753
+ * expected_checksum = checksum ;
1754
+ return false;
1755
+ }
1756
+
1737
1757
static int64
1738
1758
_tarWriteHeader (bbsink * sink , const char * filename , const char * linktarget ,
1739
1759
struct stat * statbuf , bool sizeonly )
0 commit comments