Skip to content

Commit 2fd2eff

Browse files
committed
Improve server code to read files as part of a base backup.
Don't use fread(), since that doesn't necessarily set errno. We could use read() instead, but it's even better to use pg_pread(), which allows us to avoid some extra calls to seek to the desired location in the file. Also, advertise a wait event while reading from a file, as we do for most other places where we're reading data from files. Patch by me, reviewed by Hamid Akhtar. Discussion: http://postgr.es/m/CA+TgmobBw-3573vMosGj06r72ajHsYeKtksT_oTxH8XvTL7DxA@mail.gmail.com
1 parent 453e0e3 commit 2fd2eff

File tree

4 files changed

+86
-67
lines changed

4 files changed

+86
-67
lines changed

doc/src/sgml/monitoring.sgml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,6 +1193,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
11931193
</thead>
11941194

11951195
<tbody>
1196+
<row>
1197+
<entry><literal>BaseBackupRead</literal></entry>
1198+
<entry>Waiting for base backup to read from a file.</entry>
1199+
</row>
11961200
<row>
11971201
<entry><literal>BufFileRead</literal></entry>
11981202
<entry>Waiting for a read from a buffered file.</entry>

src/backend/postmaster/pgstat.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3931,6 +3931,9 @@ pgstat_get_wait_io(WaitEventIO w)
39313931

39323932
switch (w)
39333933
{
3934+
case WAIT_EVENT_BASEBACKUP_READ:
3935+
event_name = "BaseBackupRead";
3936+
break;
39343937
case WAIT_EVENT_BUFFILE_READ:
39353938
event_name = "BufFileRead";
39363939
break;

src/backend/replication/basebackup.c

Lines changed: 77 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ static int compareWalFileNames(const ListCell *a, const ListCell *b);
8181
static void throttle(size_t increment);
8282
static void update_basebackup_progress(int64 delta);
8383
static bool is_checksummed_file(const char *fullpath, const char *filename);
84+
static int basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
85+
const char *filename, bool partial_read_ok);
8486

8587
/* Was the backup currently in-progress initiated in recovery mode? */
8688
static bool backup_started_in_recovery = false;
@@ -98,18 +100,6 @@ static char *statrelpath = NULL;
98100
*/
99101
#define THROTTLING_FREQUENCY 8
100102

101-
/*
102-
* Checks whether we encountered any error in fread(). fread() doesn't give
103-
* any clue what has happened, so we check with ferror(). Also, neither
104-
* fread() nor ferror() set errno, so we just throw a generic error.
105-
*/
106-
#define CHECK_FREAD_ERROR(fp, filename) \
107-
do { \
108-
if (ferror(fp)) \
109-
ereport(ERROR, \
110-
(errmsg("could not read from file \"%s\"", filename))); \
111-
} while (0)
112-
113103
/* The actual number of bytes, transfer of which may cause sleep. */
114104
static uint64 throttling_sample;
115105

@@ -600,16 +590,16 @@ perform_base_backup(basebackup_options *opt)
600590
foreach(lc, walFileList)
601591
{
602592
char *walFileName = (char *) lfirst(lc);
603-
FILE *fp;
593+
int fd;
604594
char buf[TAR_SEND_SIZE];
605595
size_t cnt;
606596
pgoff_t len = 0;
607597

608598
snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFileName);
609599
XLogFromFileName(walFileName, &tli, &segno, wal_segment_size);
610600

611-
fp = AllocateFile(pathbuf, "rb");
612-
if (fp == NULL)
601+
fd = OpenTransientFile(pathbuf, O_RDONLY | PG_BINARY);
602+
if (fd < 0)
613603
{
614604
int save_errno = errno;
615605

@@ -626,7 +616,7 @@ perform_base_backup(basebackup_options *opt)
626616
errmsg("could not open file \"%s\": %m", pathbuf)));
627617
}
628618

629-
if (fstat(fileno(fp), &statbuf) != 0)
619+
if (fstat(fd, &statbuf) != 0)
630620
ereport(ERROR,
631621
(errcode_for_file_access(),
632622
errmsg("could not stat file \"%s\": %m",
@@ -642,9 +632,10 @@ perform_base_backup(basebackup_options *opt)
642632
/* send the WAL file itself */
643633
_tarWriteHeader(pathbuf, NULL, &statbuf, false);
644634

645-
while ((cnt = fread(buf, 1,
646-
Min(sizeof(buf), wal_segment_size - len),
647-
fp)) > 0)
635+
while ((cnt = basebackup_read_file(fd, buf,
636+
Min(sizeof(buf),
637+
wal_segment_size - len),
638+
len, pathbuf, true)) > 0)
648639
{
649640
CheckXLogRemoved(segno, tli);
650641
/* Send the chunk as a CopyData message */
@@ -660,8 +651,6 @@ perform_base_backup(basebackup_options *opt)
660651
break;
661652
}
662653

663-
CHECK_FREAD_ERROR(fp, pathbuf);
664-
665654
if (len != wal_segment_size)
666655
{
667656
CheckXLogRemoved(segno, tli);
@@ -676,7 +665,7 @@ perform_base_backup(basebackup_options *opt)
676665
*/
677666
Assert(wal_segment_size % TAR_BLOCK_SIZE == 0);
678667

679-
FreeFile(fp);
668+
CloseTransientFile(fd);
680669

681670
/*
682671
* Mark file as archived, otherwise files can get archived again
@@ -1575,7 +1564,7 @@ sendFile(const char *readfilename, const char *tarfilename,
15751564
struct stat *statbuf, bool missing_ok, Oid dboid,
15761565
backup_manifest_info *manifest, const char *spcoid)
15771566
{
1578-
FILE *fp;
1567+
int fd;
15791568
BlockNumber blkno = 0;
15801569
bool block_retry = false;
15811570
char buf[TAR_SEND_SIZE];
@@ -1594,8 +1583,8 @@ sendFile(const char *readfilename, const char *tarfilename,
15941583

15951584
pg_checksum_init(&checksum_ctx, manifest->checksum_type);
15961585

1597-
fp = AllocateFile(readfilename, "rb");
1598-
if (fp == NULL)
1586+
fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
1587+
if (fd < 0)
15991588
{
16001589
if (errno == ENOENT && missing_ok)
16011590
return false;
@@ -1637,8 +1626,27 @@ sendFile(const char *readfilename, const char *tarfilename,
16371626
}
16381627
}
16391628

1640-
while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1629+
/*
1630+
* Loop until we read the amount of data the caller told us to expect. The
1631+
* file could be longer, if it was extended while we were sending it, but
1632+
* for a base backup we can ignore such extended data. It will be restored
1633+
* from WAL.
1634+
*/
1635+
while (len < statbuf->st_size)
16411636
{
1637+
/* Try to read some more data. */
1638+
cnt = basebackup_read_file(fd, buf,
1639+
Min(sizeof(buf), statbuf->st_size - len),
1640+
len, readfilename, true);
1641+
1642+
/*
1643+
* If we hit end-of-file, a concurrent truncation must have occurred.
1644+
* That's not an error condition, because WAL replay will fix things
1645+
* up.
1646+
*/
1647+
if (cnt == 0)
1648+
break;
1649+
16421650
/*
16431651
* The checksums are verified at block level, so we iterate over the
16441652
* buffer in chunks of BLCKSZ, after making sure that
@@ -1689,16 +1697,15 @@ sendFile(const char *readfilename, const char *tarfilename,
16891697
*/
16901698
if (block_retry == false)
16911699
{
1692-
/* Reread the failed block */
1693-
if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
1694-
{
1695-
ereport(ERROR,
1696-
(errcode_for_file_access(),
1697-
errmsg("could not fseek in file \"%s\": %m",
1698-
readfilename)));
1699-
}
1700+
int reread_cnt;
17001701

1701-
if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
1702+
/* Reread the failed block */
1703+
reread_cnt =
1704+
basebackup_read_file(fd, buf + BLCKSZ * i,
1705+
BLCKSZ, len + BLCKSZ * i,
1706+
readfilename,
1707+
false);
1708+
if (reread_cnt == 0)
17021709
{
17031710
/*
17041711
* If we hit end-of-file, a concurrent
@@ -1708,24 +1715,8 @@ sendFile(const char *readfilename, const char *tarfilename,
17081715
* code that handles that case. (We must fix
17091716
* up cnt first, though.)
17101717
*/
1711-
if (feof(fp))
1712-
{
1713-
cnt = BLCKSZ * i;
1714-
break;
1715-
}
1716-
1717-
ereport(ERROR,
1718-
(errcode_for_file_access(),
1719-
errmsg("could not reread block %d of file \"%s\": %m",
1720-
blkno, readfilename)));
1721-
}
1722-
1723-
if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
1724-
{
1725-
ereport(ERROR,
1726-
(errcode_for_file_access(),
1727-
errmsg("could not fseek in file \"%s\": %m",
1728-
readfilename)));
1718+
cnt = BLCKSZ * i;
1719+
break;
17291720
}
17301721

17311722
/* Set flag so we know a retry was attempted */
@@ -1768,20 +1759,8 @@ sendFile(const char *readfilename, const char *tarfilename,
17681759

17691760
len += cnt;
17701761
throttle(cnt);
1771-
1772-
if (feof(fp) || len >= statbuf->st_size)
1773-
{
1774-
/*
1775-
* Reached end of file. The file could be longer, if it was
1776-
* extended while we were sending it, but for a base backup we can
1777-
* ignore such extended data. It will be restored from WAL.
1778-
*/
1779-
break;
1780-
}
17811762
}
17821763

1783-
CHECK_FREAD_ERROR(fp, readfilename);
1784-
17851764
/* If the file was truncated while we were sending it, pad it with zeros */
17861765
if (len < statbuf->st_size)
17871766
{
@@ -1810,7 +1789,7 @@ sendFile(const char *readfilename, const char *tarfilename,
18101789
update_basebackup_progress(pad);
18111790
}
18121791

1813-
FreeFile(fp);
1792+
CloseTransientFile(fd);
18141793

18151794
if (checksum_failures > 1)
18161795
{
@@ -1996,3 +1975,35 @@ update_basebackup_progress(int64 delta)
19961975

19971976
pgstat_progress_update_multi_param(nparam, index, val);
19981977
}
1978+
1979+
/*
1980+
* Read some data from a file, setting a wait event and reporting any error
1981+
* encountered.
1982+
*
1983+
* If partial_read_ok is false, also report an error if the number of bytes
1984+
* read is not equal to the number of bytes requested.
1985+
*
1986+
* Returns the number of bytes read.
1987+
*/
1988+
static int
1989+
basebackup_read_file(int fd, char *buf, size_t nbytes, off_t offset,
1990+
const char *filename, bool partial_read_ok)
1991+
{
1992+
int rc;
1993+
1994+
pgstat_report_wait_start(WAIT_EVENT_BASEBACKUP_READ);
1995+
rc = pg_pread(fd, buf, nbytes, offset);
1996+
pgstat_report_wait_end();
1997+
1998+
if (rc < 0)
1999+
ereport(ERROR,
2000+
(errcode_for_file_access(),
2001+
errmsg("could not read file \"%s\": %m", filename)));
2002+
if (!partial_read_ok && rc > 0 && rc != nbytes)
2003+
ereport(ERROR,
2004+
(errcode_for_file_access(),
2005+
errmsg("could not read file \"%s\": read %d of %zu",
2006+
filename, rc, nbytes)));
2007+
2008+
return rc;
2009+
}

src/include/pgstat.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -913,7 +913,8 @@ typedef enum
913913
*/
914914
typedef enum
915915
{
916-
WAIT_EVENT_BUFFILE_READ = PG_WAIT_IO,
916+
WAIT_EVENT_BASEBACKUP_READ = PG_WAIT_IO,
917+
WAIT_EVENT_BUFFILE_READ,
917918
WAIT_EVENT_BUFFILE_WRITE,
918919
WAIT_EVENT_CONTROL_FILE_READ,
919920
WAIT_EVENT_CONTROL_FILE_SYNC,

0 commit comments

Comments
 (0)