Skip to content

Commit 79b5b10

Browse files
committed
Address set of issues with errno handling
System calls mixed up in error code paths are causing two issues which several code paths have not correctly handled: 1) For write() calls, sometimes the system may return less bytes than what has been written without errno being set. Some paths were careful enough to consider that case, and assumed that errno should be set to ENOSPC, other calls missed that. 2) errno generated by a system call is overwritten by other system calls which may succeed once an error code path is taken, causing what is reported to the user to be incorrect. This patch uses the brute-force approach of correcting all those code paths. Some refactoring could happen in the future, but this is let as future work, which is not targeted for back-branches anyway. Author: Michael Paquier Reviewed-by: Ashutosh Sharma Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz
1 parent 21ba0b4 commit 79b5b10

File tree

8 files changed

+82
-3
lines changed

8 files changed

+82
-3
lines changed

src/backend/access/heap/rewriteheap.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,9 +1173,14 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
11731173

11741174
/* write out tail end of mapping file (again) */
11751175
if (write(fd, data, len) != len)
1176+
{
1177+
/* if write didn't set errno, assume problem is no disk space */
1178+
if (errno == 0)
1179+
errno = ENOSPC;
11761180
ereport(ERROR,
11771181
(errcode_for_file_access(),
11781182
errmsg("could not write to file \"%s\": %m", path)));
1183+
}
11791184

11801185
/*
11811186
* Now fsync all previously written data. We could improve things and only

src/backend/access/transam/twophase.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,12 +1238,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12381238
*/
12391239
if (fstat(fd, &stat))
12401240
{
1241+
int save_errno = errno;
1242+
12411243
CloseTransientFile(fd);
12421244
if (give_warnings)
1245+
{
1246+
errno = save_errno;
12431247
ereport(WARNING,
12441248
(errcode_for_file_access(),
12451249
errmsg("could not stat two-phase state file \"%s\": %m",
12461250
path)));
1251+
}
12471252
return NULL;
12481253
}
12491254

@@ -1270,12 +1275,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12701275

12711276
if (read(fd, buf, stat.st_size) != stat.st_size)
12721277
{
1278+
int save_errno = errno;
1279+
12731280
CloseTransientFile(fd);
12741281
if (give_warnings)
1282+
{
1283+
errno = save_errno;
12751284
ereport(WARNING,
12761285
(errcode_for_file_access(),
12771286
errmsg("could not read two-phase state file \"%s\": %m",
12781287
path)));
1288+
}
12791289
pfree(buf);
12801290
return NULL;
12811291
}
@@ -1563,14 +1573,24 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
15631573
/* Write content and CRC */
15641574
if (write(fd, content, len) != len)
15651575
{
1576+
int save_errno = errno;
1577+
15661578
CloseTransientFile(fd);
1579+
1580+
/* if write didn't set errno, assume problem is no disk space */
1581+
errno = save_errno ? save_errno : ENOSPC;
15671582
ereport(ERROR,
15681583
(errcode_for_file_access(),
15691584
errmsg("could not write two-phase state file: %m")));
15701585
}
15711586
if (write(fd, &statefile_crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
15721587
{
1588+
int save_errno = errno;
1589+
15731590
CloseTransientFile(fd);
1591+
1592+
/* if write didn't set errno, assume problem is no disk space */
1593+
errno = save_errno ? save_errno : ENOSPC;
15741594
ereport(ERROR,
15751595
(errcode_for_file_access(),
15761596
errmsg("could not write two-phase state file: %m")));
@@ -1582,7 +1602,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
15821602
*/
15831603
if (pg_fsync(fd) != 0)
15841604
{
1605+
int save_errno = errno;
1606+
15851607
CloseTransientFile(fd);
1608+
errno = save_errno;
15861609
ereport(ERROR,
15871610
(errcode_for_file_access(),
15881611
errmsg("could not fsync two-phase state file: %m")));

src/backend/access/transam/xlog.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,7 +3231,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
32313231

32323232
if (pg_fsync(fd) != 0)
32333233
{
3234+
int save_errno = errno;
3235+
32343236
close(fd);
3237+
errno = save_errno;
32353238
ereport(ERROR,
32363239
(errcode_for_file_access(),
32373240
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -10970,8 +10973,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1097010973
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
1097110974
{
1097210975
char fname[MAXFNAMELEN];
10976+
int save_errno = errno;
1097310977

1097410978
XLogFileName(fname, curFileTLI, readSegNo);
10979+
errno = save_errno;
1097510980
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1097610981
(errcode_for_file_access(),
1097710982
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -10982,8 +10987,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1098210987
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1098310988
{
1098410989
char fname[MAXFNAMELEN];
10990+
int save_errno = errno;
1098510991

1098610992
XLogFileName(fname, curFileTLI, readSegNo);
10993+
errno = save_errno;
1098710994
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1098810995
(errcode_for_file_access(),
1098910996
errmsg("could not read from log segment %s, offset %u: %m",

src/backend/replication/basebackup.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,13 +448,16 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
448448
fp = AllocateFile(pathbuf, "rb");
449449
if (fp == NULL)
450450
{
451+
int save_errno = errno;
452+
451453
/*
452454
* Most likely reason for this is that the file was already
453455
* removed by a checkpoint, so check for that to get a better
454456
* error message.
455457
*/
456458
CheckXLogRemoved(segno, tli);
457459

460+
errno = save_errno;
458461
ereport(ERROR,
459462
(errcode_for_file_access(),
460463
errmsg("could not open file \"%s\": %m", pathbuf)));

src/backend/replication/logical/reorderbuffer.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2170,7 +2170,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
21702170
int save_errno = errno;
21712171

21722172
CloseTransientFile(fd);
2173-
errno = save_errno;
2173+
2174+
/* if write didn't set errno, assume problem is no disk space */
2175+
errno = save_errno ? save_errno : ENOSPC;
21742176
ereport(ERROR,
21752177
(errcode_for_file_access(),
21762178
errmsg("could not write to data file for XID %u: %m",

src/backend/replication/logical/snapbuild.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,7 +1566,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
15661566

15671567
if ((write(fd, ondisk, needed_length)) != needed_length)
15681568
{
1569+
int save_errno = errno;
1570+
15691571
CloseTransientFile(fd);
1572+
1573+
/* if write didn't set errno, assume problem is no disk space */
1574+
errno = save_errno ? save_errno : ENOSPC;
15701575
ereport(ERROR,
15711576
(errcode_for_file_access(),
15721577
errmsg("could not write to file \"%s\": %m", tmppath)));
@@ -1582,7 +1587,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
15821587
*/
15831588
if (pg_fsync(fd) != 0)
15841589
{
1590+
int save_errno = errno;
1591+
15851592
CloseTransientFile(fd);
1593+
errno = save_errno;
15861594
ereport(ERROR,
15871595
(errcode_for_file_access(),
15881596
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -1664,7 +1672,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
16641672
readBytes = read(fd, &ondisk, SnapBuildOnDiskConstantSize);
16651673
if (readBytes != SnapBuildOnDiskConstantSize)
16661674
{
1675+
int save_errno = errno;
1676+
16671677
CloseTransientFile(fd);
1678+
errno = save_errno;
16681679
ereport(ERROR,
16691680
(errcode_for_file_access(),
16701681
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1690,7 +1701,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
16901701
readBytes = read(fd, &ondisk.builder, sizeof(SnapBuild));
16911702
if (readBytes != sizeof(SnapBuild))
16921703
{
1704+
int save_errno = errno;
1705+
16931706
CloseTransientFile(fd);
1707+
errno = save_errno;
16941708
ereport(ERROR,
16951709
(errcode_for_file_access(),
16961710
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1705,7 +1719,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17051719
readBytes = read(fd, ondisk.builder.was_running.was_xip, sz);
17061720
if (readBytes != sz)
17071721
{
1722+
int save_errno = errno;
1723+
17081724
CloseTransientFile(fd);
1725+
errno = save_errno;
17091726
ereport(ERROR,
17101727
(errcode_for_file_access(),
17111728
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1719,7 +1736,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17191736
readBytes = read(fd, ondisk.builder.committed.xip, sz);
17201737
if (readBytes != sz)
17211738
{
1739+
int save_errno = errno;
1740+
17221741
CloseTransientFile(fd);
1742+
errno = save_errno;
17231743
ereport(ERROR,
17241744
(errcode_for_file_access(),
17251745
errmsg("could not read file \"%s\", read %d of %d: %m",

src/backend/replication/slot.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1031,7 +1031,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
10311031
int save_errno = errno;
10321032

10331033
CloseTransientFile(fd);
1034-
errno = save_errno;
1034+
1035+
/* if write didn't set errno, assume problem is no disk space */
1036+
errno = save_errno ? save_errno : ENOSPC;
10351037
ereport(elevel,
10361038
(errcode_for_file_access(),
10371039
errmsg("could not write to file \"%s\": %m",
@@ -1134,7 +1136,10 @@ RestoreSlotFromDisk(const char *name)
11341136
*/
11351137
if (pg_fsync(fd) != 0)
11361138
{
1139+
int save_errno = errno;
1140+
11371141
CloseTransientFile(fd);
1142+
errno = save_errno;
11381143
ereport(PANIC,
11391144
(errcode_for_file_access(),
11401145
errmsg("could not fsync file \"%s\": %m",

src/bin/pg_basebackup/receivelog.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir,
139139
{
140140
if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
141141
{
142+
/* if write didn't set errno, assume problem is no disk space */
143+
if (errno == 0)
144+
errno = ENOSPC;
142145
fprintf(stderr,
143146
_("%s: could not pad transaction log file \"%s\": %s\n"),
144147
progname, fn, strerror(errno));
@@ -325,7 +328,9 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename,
325328
*/
326329
close(fd);
327330
unlink(tmppath);
328-
errno = save_errno;
331+
332+
/* if write didn't set errno, assume problem is no disk space */
333+
errno = save_errno ? save_errno : ENOSPC;
329334

330335
fprintf(stderr, _("%s: could not write timeline history file \"%s\": %s\n"),
331336
progname, tmppath, strerror(errno));
@@ -334,7 +339,10 @@ writeTimeLineHistoryFile(char *basedir, TimeLineID tli, char *filename,
334339

335340
if (fsync(fd) != 0)
336341
{
342+
int save_errno = errno;
343+
337344
close(fd);
345+
errno = save_errno;
338346
fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"),
339347
progname, tmppath, strerror(errno));
340348
return false;
@@ -1090,6 +1098,12 @@ HandleCopyStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline,
10901098
copybuf + hdr_len + bytes_written,
10911099
bytes_to_write) != bytes_to_write)
10921100
{
1101+
/*
1102+
* If write didn't set errno, assume problem is no disk
1103+
* space.
1104+
*/
1105+
if (errno == 0)
1106+
errno = ENOSPC;
10931107
fprintf(stderr,
10941108
_("%s: could not write %u bytes to WAL file \"%s\": %s\n"),
10951109
progname, bytes_to_write, current_walfile_name,

0 commit comments

Comments
 (0)