Skip to content

Commit 6eec672

Browse files
committed
Address set of issues with errno handling
System calls mixed up in error code paths are causing two issues which several code paths have not correctly handled: 1) For write() calls, sometimes the system may return less bytes than what has been written without errno being set. Some paths were careful enough to consider that case, and assumed that errno should be set to ENOSPC, other calls missed that. 2) errno generated by a system call is overwritten by other system calls which may succeed once an error code path is taken, causing what is reported to the user to be incorrect. This patch uses the brute-force approach of correcting all those code paths. Some refactoring could happen in the future, but this is let as future work, which is not targeted for back-branches anyway. Author: Michael Paquier Reviewed-by: Ashutosh Sharma Discussion: https://postgr.es/m/20180622061535.GD5215@paquier.xyz
1 parent 6350dcc commit 6eec672

File tree

10 files changed

+118
-4
lines changed

10 files changed

+118
-4
lines changed

src/backend/access/heap/rewriteheap.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,9 +1169,14 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
11691169
/* write out tail end of mapping file (again) */
11701170
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);
11711171
if (write(fd, data, len) != len)
1172+
{
1173+
/* if write didn't set errno, assume problem is no disk space */
1174+
if (errno == 0)
1175+
errno = ENOSPC;
11721176
ereport(ERROR,
11731177
(errcode_for_file_access(),
11741178
errmsg("could not write to file \"%s\": %m", path)));
1179+
}
11751180
pgstat_report_wait_end();
11761181

11771182
/*

src/backend/access/transam/twophase.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,12 +1214,17 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12141214
*/
12151215
if (fstat(fd, &stat))
12161216
{
1217+
int save_errno = errno;
1218+
12171219
CloseTransientFile(fd);
12181220
if (give_warnings)
1221+
{
1222+
errno = save_errno;
12191223
ereport(WARNING,
12201224
(errcode_for_file_access(),
12211225
errmsg("could not stat two-phase state file \"%s\": %m",
12221226
path)));
1227+
}
12231228
return NULL;
12241229
}
12251230

@@ -1247,13 +1252,18 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
12471252
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_READ);
12481253
if (read(fd, buf, stat.st_size) != stat.st_size)
12491254
{
1255+
int save_errno = errno;
1256+
12501257
pgstat_report_wait_end();
12511258
CloseTransientFile(fd);
12521259
if (give_warnings)
1260+
{
1261+
errno = save_errno;
12531262
ereport(WARNING,
12541263
(errcode_for_file_access(),
12551264
errmsg("could not read two-phase state file \"%s\": %m",
12561265
path)));
1266+
}
12571267
pfree(buf);
12581268
return NULL;
12591269
}
@@ -1597,16 +1607,26 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
15971607
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_WRITE);
15981608
if (write(fd, content, len) != len)
15991609
{
1610+
int save_errno = errno;
1611+
16001612
pgstat_report_wait_end();
16011613
CloseTransientFile(fd);
1614+
1615+
/* if write didn't set errno, assume problem is no disk space */
1616+
errno = save_errno ? save_errno : ENOSPC;
16021617
ereport(ERROR,
16031618
(errcode_for_file_access(),
16041619
errmsg("could not write two-phase state file: %m")));
16051620
}
16061621
if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c))
16071622
{
1623+
int save_errno = errno;
1624+
16081625
pgstat_report_wait_end();
16091626
CloseTransientFile(fd);
1627+
1628+
/* if write didn't set errno, assume problem is no disk space */
1629+
errno = save_errno ? save_errno : ENOSPC;
16101630
ereport(ERROR,
16111631
(errcode_for_file_access(),
16121632
errmsg("could not write two-phase state file: %m")));
@@ -1620,7 +1640,10 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
16201640
pgstat_report_wait_start(WAIT_EVENT_TWOPHASE_FILE_SYNC);
16211641
if (pg_fsync(fd) != 0)
16221642
{
1643+
int save_errno = errno;
1644+
16231645
CloseTransientFile(fd);
1646+
errno = save_errno;
16241647
ereport(ERROR,
16251648
(errcode_for_file_access(),
16261649
errmsg("could not fsync two-phase state file: %m")));

src/backend/access/transam/xlog.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3243,7 +3243,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
32433243
pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
32443244
if (pg_fsync(fd) != 0)
32453245
{
3246+
int save_errno = errno;
3247+
32463248
close(fd);
3249+
errno = save_errno;
32473250
ereport(ERROR,
32483251
(errcode_for_file_access(),
32493252
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -11590,8 +11593,10 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1159011593
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
1159111594
{
1159211595
char fname[MAXFNAMELEN];
11596+
int save_errno = errno;
1159311597

1159411598
XLogFileName(fname, curFileTLI, readSegNo);
11599+
errno = save_errno;
1159511600
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1159611601
(errcode_for_file_access(),
1159711602
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -11603,9 +11608,11 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1160311608
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1160411609
{
1160511610
char fname[MAXFNAMELEN];
11611+
int save_errno = errno;
1160611612

1160711613
pgstat_report_wait_end();
1160811614
XLogFileName(fname, curFileTLI, readSegNo);
11615+
errno = save_errno;
1160911616
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
1161011617
(errcode_for_file_access(),
1161111618
errmsg("could not read from log segment %s, offset %u: %m",

src/backend/access/transam/xlogutils.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,9 +716,11 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
716716
if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
717717
{
718718
char path[MAXPGPATH];
719+
int save_errno = errno;
719720

720721
XLogFilePath(path, tli, sendSegNo);
721722

723+
errno = save_errno;
722724
ereport(ERROR,
723725
(errcode_for_file_access(),
724726
errmsg("could not seek in log segment %s to offset %u: %m",
@@ -739,9 +741,11 @@ XLogRead(char *buf, TimeLineID tli, XLogRecPtr startptr, Size count)
739741
if (readbytes <= 0)
740742
{
741743
char path[MAXPGPATH];
744+
int save_errno = errno;
742745

743746
XLogFilePath(path, tli, sendSegNo);
744747

748+
errno = save_errno;
745749
ereport(ERROR,
746750
(errcode_for_file_access(),
747751
errmsg("could not read from log segment %s, offset %u, length %lu: %m",

src/backend/replication/basebackup.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,13 +463,16 @@ perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
463463
fp = AllocateFile(pathbuf, "rb");
464464
if (fp == NULL)
465465
{
466+
int save_errno = errno;
467+
466468
/*
467469
* Most likely reason for this is that the file was already
468470
* removed by a checkpoint, so check for that to get a better
469471
* error message.
470472
*/
471473
CheckXLogRemoved(segno, tli);
472474

475+
errno = save_errno;
473476
ereport(ERROR,
474477
(errcode_for_file_access(),
475478
errmsg("could not open file \"%s\": %m", pathbuf)));

src/backend/replication/logical/origin.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,12 @@ CheckPointReplicationOrigin(void)
579579
/* write magic */
580580
if ((write(tmpfd, &magic, sizeof(magic))) != sizeof(magic))
581581
{
582+
int save_errno = errno;
583+
582584
CloseTransientFile(tmpfd);
585+
586+
/* if write didn't set errno, assume problem is no disk space */
587+
errno = save_errno ? save_errno : ENOSPC;
583588
ereport(PANIC,
584589
(errcode_for_file_access(),
585590
errmsg("could not write to file \"%s\": %m",
@@ -618,7 +623,12 @@ CheckPointReplicationOrigin(void)
618623
if ((write(tmpfd, &disk_state, sizeof(disk_state))) !=
619624
sizeof(disk_state))
620625
{
626+
int save_errno = errno;
627+
621628
CloseTransientFile(tmpfd);
629+
630+
/* if write didn't set errno, assume problem is no disk space */
631+
errno = save_errno ? save_errno : ENOSPC;
622632
ereport(PANIC,
623633
(errcode_for_file_access(),
624634
errmsg("could not write to file \"%s\": %m",
@@ -634,7 +644,12 @@ CheckPointReplicationOrigin(void)
634644
FIN_CRC32C(crc);
635645
if ((write(tmpfd, &crc, sizeof(crc))) != sizeof(crc))
636646
{
647+
int save_errno = errno;
648+
637649
CloseTransientFile(tmpfd);
650+
651+
/* if write didn't set errno, assume problem is no disk space */
652+
errno = save_errno ? save_errno : ENOSPC;
638653
ereport(PANIC,
639654
(errcode_for_file_access(),
640655
errmsg("could not write to file \"%s\": %m",

src/backend/replication/logical/reorderbuffer.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2309,7 +2309,9 @@ ReorderBufferSerializeChange(ReorderBuffer *rb, ReorderBufferTXN *txn,
23092309
int save_errno = errno;
23102310

23112311
CloseTransientFile(fd);
2312-
errno = save_errno;
2312+
2313+
/* if write didn't set errno, assume problem is no disk space */
2314+
errno = save_errno ? save_errno : ENOSPC;
23132315
ereport(ERROR,
23142316
(errcode_for_file_access(),
23152317
errmsg("could not write to data file for XID %u: %m",

src/backend/replication/logical/snapbuild.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1606,7 +1606,12 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
16061606
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_WRITE);
16071607
if ((write(fd, ondisk, needed_length)) != needed_length)
16081608
{
1609+
int save_errno = errno;
1610+
16091611
CloseTransientFile(fd);
1612+
1613+
/* if write didn't set errno, assume problem is no disk space */
1614+
errno = save_errno ? save_errno : ENOSPC;
16101615
ereport(ERROR,
16111616
(errcode_for_file_access(),
16121617
errmsg("could not write to file \"%s\": %m", tmppath)));
@@ -1624,7 +1629,10 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
16241629
pgstat_report_wait_start(WAIT_EVENT_SNAPBUILD_SYNC);
16251630
if (pg_fsync(fd) != 0)
16261631
{
1632+
int save_errno = errno;
1633+
16271634
CloseTransientFile(fd);
1635+
errno = save_errno;
16281636
ereport(ERROR,
16291637
(errcode_for_file_access(),
16301638
errmsg("could not fsync file \"%s\": %m", tmppath)));
@@ -1709,7 +1717,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17091717
pgstat_report_wait_end();
17101718
if (readBytes != SnapBuildOnDiskConstantSize)
17111719
{
1720+
int save_errno = errno;
1721+
17121722
CloseTransientFile(fd);
1723+
errno = save_errno;
17131724
ereport(ERROR,
17141725
(errcode_for_file_access(),
17151726
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1737,7 +1748,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17371748
pgstat_report_wait_end();
17381749
if (readBytes != sizeof(SnapBuild))
17391750
{
1751+
int save_errno = errno;
1752+
17401753
CloseTransientFile(fd);
1754+
errno = save_errno;
17411755
ereport(ERROR,
17421756
(errcode_for_file_access(),
17431757
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1754,7 +1768,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17541768
pgstat_report_wait_end();
17551769
if (readBytes != sz)
17561770
{
1771+
int save_errno = errno;
1772+
17571773
CloseTransientFile(fd);
1774+
errno = save_errno;
17581775
ereport(ERROR,
17591776
(errcode_for_file_access(),
17601777
errmsg("could not read file \"%s\", read %d of %d: %m",
@@ -1770,7 +1787,10 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
17701787
pgstat_report_wait_end();
17711788
if (readBytes != sz)
17721789
{
1790+
int save_errno = errno;
1791+
17731792
CloseTransientFile(fd);
1793+
errno = save_errno;
17741794
ereport(ERROR,
17751795
(errcode_for_file_access(),
17761796
errmsg("could not read file \"%s\", read %d of %d: %m",

src/backend/replication/slot.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1268,7 +1268,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
12681268

12691269
pgstat_report_wait_end();
12701270
CloseTransientFile(fd);
1271-
errno = save_errno;
1271+
1272+
/* if write didn't set errno, assume problem is no disk space */
1273+
errno = save_errno ? save_errno : ENOSPC;
12721274
ereport(elevel,
12731275
(errcode_for_file_access(),
12741276
errmsg("could not write to file \"%s\": %m",
@@ -1372,7 +1374,10 @@ RestoreSlotFromDisk(const char *name)
13721374
pgstat_report_wait_start(WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC);
13731375
if (pg_fsync(fd) != 0)
13741376
{
1377+
int save_errno = errno;
1378+
13751379
CloseTransientFile(fd);
1380+
errno = save_errno;
13761381
ereport(PANIC,
13771382
(errcode_for_file_access(),
13781383
errmsg("could not fsync file \"%s\": %m",

src/bin/pg_basebackup/walmethods.c

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,11 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
127127

128128
pg_free(zerobuf);
129129
close(fd);
130-
errno = save_errno;
130+
131+
/*
132+
* If write didn't set errno, assume problem is no disk space.
133+
*/
134+
errno = save_errno ? save_errno : ENOSPC;
131135
return NULL;
132136
}
133137
}
@@ -441,7 +445,14 @@ tar_write_compressed_data(void *buf, size_t count, bool flush)
441445
size_t len = ZLIB_OUT_SIZE - tar_data->zp->avail_out;
442446

443447
if (write(tar_data->fd, tar_data->zlibOut, len) != len)
448+
{
449+
/*
450+
* If write didn't set errno, assume problem is no disk space.
451+
*/
452+
if (errno == 0)
453+
errno = ENOSPC;
444454
return false;
455+
}
445456

446457
tar_data->zp->next_out = tar_data->zlibOut;
447458
tar_data->zp->avail_out = ZLIB_OUT_SIZE;
@@ -621,7 +632,8 @@ tar_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
621632
save_errno = errno;
622633
pg_free(tar_data->currentfile);
623634
tar_data->currentfile = NULL;
624-
errno = save_errno;
635+
/* if write didn't set errno, assume problem is no disk space */
636+
errno = save_errno ? save_errno : ENOSPC;
625637
return NULL;
626638
}
627639
}
@@ -816,7 +828,12 @@ tar_close(Walfile f, WalCloseMethod method)
816828
if (!tar_data->compression)
817829
{
818830
if (write(tar_data->fd, tf->header, 512) != 512)
831+
{
832+
/* if write didn't set errno, assume problem is no disk space */
833+
if (errno == 0)
834+
errno = ENOSPC;
819835
return -1;
836+
}
820837
}
821838
#ifdef HAVE_LIBZ
822839
else
@@ -882,7 +899,12 @@ tar_finish(void)
882899
if (!tar_data->compression)
883900
{
884901
if (write(tar_data->fd, zerobuf, sizeof(zerobuf)) != sizeof(zerobuf))
902+
{
903+
/* if write didn't set errno, assume problem is no disk space */
904+
if (errno == 0)
905+
errno = ENOSPC;
885906
return false;
907+
}
886908
}
887909
#ifdef HAVE_LIBZ
888910
else
@@ -909,7 +931,15 @@ tar_finish(void)
909931
size_t len = ZLIB_OUT_SIZE - tar_data->zp->avail_out;
910932

911933
if (write(tar_data->fd, tar_data->zlibOut, len) != len)
934+
{
935+
/*
936+
* If write didn't set errno, assume problem is no disk
937+
* space.
938+
*/
939+
if (errno == 0)
940+
errno = ENOSPC;
912941
return false;
942+
}
913943
}
914944
if (r == Z_STREAM_END)
915945
break;

0 commit comments

Comments
 (0)