Skip to content

Commit 871fe49

Browse files
committed
Provide vectored variants of FileRead() and FileWrite().
FileReadV() and FileWriteV() adapt pg_preadv() and pg_pwritev() for fd.c's virtual file descriptors. The simple FileRead() and FileWrite() functions are now implemented in terms of the vectored functions, to avoid code duplication, and they are converted back to the corresponding simple system calls further down (commit 15c9ac3). Later work will make more interesting multi-iovec calls. The traditional behavior of reporting a "fake" ENOSPC error is simplified. It's now always set for non-failing writes, for the benefit of callers that expect to log a meaningful "%m" if they determine that the write was short. (Perhaps we should consider getting rid of that expectation one day.) Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Discussion: https://postgr.es/m/CA+hUKGJkOiOCa+mag4BF+zHo7qo=o9CFheB8=g6uT5TUm2gkvA@mail.gmail.com
1 parent 0c6be59 commit 871fe49

File tree

2 files changed

+54
-21
lines changed
  • src
    • backend/storage/file
    • include/storage

2 files changed

+54
-21
lines changed

src/backend/storage/file/fd.c

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,18 +2110,18 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
21102110
}
21112111

21122112
int
2113-
FileRead(File file, void *buffer, size_t amount, off_t offset,
2114-
uint32 wait_event_info)
2113+
FileReadV(File file, const struct iovec *iov, int iovcnt, off_t offset,
2114+
uint32 wait_event_info)
21152115
{
21162116
int returnCode;
21172117
Vfd *vfdP;
21182118

21192119
Assert(FileIsValid(file));
21202120

2121-
DO_DB(elog(LOG, "FileRead: %d (%s) " INT64_FORMAT " %zu %p",
2121+
DO_DB(elog(LOG, "FileReadV: %d (%s) " INT64_FORMAT " %d",
21222122
file, VfdCache[file].fileName,
21232123
(int64) offset,
2124-
amount, buffer));
2124+
iovcnt));
21252125

21262126
returnCode = FileAccess(file);
21272127
if (returnCode < 0)
@@ -2131,7 +2131,7 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
21312131

21322132
retry:
21332133
pgstat_report_wait_start(wait_event_info);
2134-
returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
2134+
returnCode = pg_preadv(vfdP->fd, iov, iovcnt, offset);
21352135
pgstat_report_wait_end();
21362136

21372137
if (returnCode < 0)
@@ -2166,18 +2166,18 @@ FileRead(File file, void *buffer, size_t amount, off_t offset,
21662166
}
21672167

21682168
int
2169-
FileWrite(File file, const void *buffer, size_t amount, off_t offset,
2170-
uint32 wait_event_info)
2169+
FileWriteV(File file, const struct iovec *iov, int iovcnt, off_t offset,
2170+
uint32 wait_event_info)
21712171
{
21722172
int returnCode;
21732173
Vfd *vfdP;
21742174

21752175
Assert(FileIsValid(file));
21762176

2177-
DO_DB(elog(LOG, "FileWrite: %d (%s) " INT64_FORMAT " %zu %p",
2177+
DO_DB(elog(LOG, "FileWriteV: %d (%s) " INT64_FORMAT " %d",
21782178
file, VfdCache[file].fileName,
21792179
(int64) offset,
2180-
amount, buffer));
2180+
iovcnt));
21812181

21822182
returnCode = FileAccess(file);
21832183
if (returnCode < 0)
@@ -2195,7 +2195,10 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
21952195
*/
21962196
if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
21972197
{
2198-
off_t past_write = offset + amount;
2198+
off_t past_write = offset;
2199+
2200+
for (int i = 0; i < iovcnt; ++i)
2201+
past_write += iov[i].iov_len;
21992202

22002203
if (past_write > vfdP->fileSize)
22012204
{
@@ -2211,23 +2214,27 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
22112214
}
22122215

22132216
retry:
2214-
errno = 0;
22152217
pgstat_report_wait_start(wait_event_info);
2216-
returnCode = pg_pwrite(VfdCache[file].fd, buffer, amount, offset);
2218+
returnCode = pg_pwritev(vfdP->fd, iov, iovcnt, offset);
22172219
pgstat_report_wait_end();
22182220

2219-
/* if write didn't set errno, assume problem is no disk space */
2220-
if (returnCode != amount && errno == 0)
2221-
errno = ENOSPC;
2222-
22232221
if (returnCode >= 0)
22242222
{
2223+
/*
2224+
* Some callers expect short writes to set errno, and traditionally we
2225+
* have assumed that they imply disk space shortage. We don't want to
2226+
* waste CPU cycles adding up the total size here, so we'll just set
2227+
* it for all successful writes in case such a caller determines that
2228+
* the write was short and ereports "%m".
2229+
*/
2230+
errno = ENOSPC;
2231+
22252232
/*
22262233
* Maintain fileSize and temporary_files_size if it's a temp file.
22272234
*/
22282235
if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
22292236
{
2230-
off_t past_write = offset + amount;
2237+
off_t past_write = offset + returnCode;
22312238

22322239
if (past_write > vfdP->fileSize)
22332240
{
@@ -2239,7 +2246,7 @@ FileWrite(File file, const void *buffer, size_t amount, off_t offset,
22392246
else
22402247
{
22412248
/*
2242-
* See comments in FileRead()
2249+
* See comments in FileReadV()
22432250
*/
22442251
#ifdef WIN32
22452252
DWORD error = GetLastError();

src/include/storage/fd.h

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
/*
1616
* calls:
1717
*
18-
* File {Close, Read, Write, Size, Sync}
18+
* File {Close, Read, ReadV, Write, WriteV, Size, Sync}
1919
* {Path Name Open, Allocate, Free} File
2020
*
2121
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
@@ -43,6 +43,8 @@
4343
#ifndef FD_H
4444
#define FD_H
4545

46+
#include "port/pg_iovec.h"
47+
4648
#include <dirent.h>
4749
#include <fcntl.h>
4850

@@ -105,8 +107,8 @@ extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fil
105107
extern File OpenTemporaryFile(bool interXact);
106108
extern void FileClose(File file);
107109
extern int FilePrefetch(File file, off_t offset, off_t amount, uint32 wait_event_info);
108-
extern int FileRead(File file, void *buffer, size_t amount, off_t offset, uint32 wait_event_info);
109-
extern int FileWrite(File file, const void *buffer, size_t amount, off_t offset, uint32 wait_event_info);
110+
extern int FileReadV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
111+
extern int FileWriteV(File file, const struct iovec *ioc, int iovcnt, off_t offset, uint32 wait_event_info);
110112
extern int FileSync(File file, uint32 wait_event_info);
111113
extern int FileZero(File file, off_t offset, off_t amount, uint32 wait_event_info);
112114
extern int FileFallocate(File file, off_t offset, off_t amount, uint32 wait_event_info);
@@ -189,4 +191,28 @@ extern int durable_unlink(const char *fname, int elevel);
189191
extern void SyncDataDirectory(void);
190192
extern int data_sync_elevel(int elevel);
191193

194+
static inline int
195+
FileRead(File file, void *buffer, size_t amount, off_t offset,
196+
uint32 wait_event_info)
197+
{
198+
struct iovec iov = {
199+
.iov_base = buffer,
200+
.iov_len = amount
201+
};
202+
203+
return FileReadV(file, &iov, 1, offset, wait_event_info);
204+
}
205+
206+
static inline int
207+
FileWrite(File file, const void *buffer, size_t amount, off_t offset,
208+
uint32 wait_event_info)
209+
{
210+
struct iovec iov = {
211+
.iov_base = unconstify(void *, buffer),
212+
.iov_len = amount
213+
};
214+
215+
return FileWriteV(file, &iov, 1, offset, wait_event_info);
216+
}
217+
192218
#endif /* FD_H */

0 commit comments

Comments
 (0)