Skip to content

Commit 2dbe890

Browse files
committed
Support direct I/O on macOS.
Macs don't understand O_DIRECT, but they can disable caching with a separate fcntl() call. Extend the file opening functions in fd.c to handle this for us if the caller passes in PG_O_DIRECT. For now, this affects only WAL data and even then only if you set: max_wal_senders=0 wal_level=minimal This is not expected to be very useful on its own, but later proposed patches will make greater use of direct I/O, and it'll be useful for testing if developers on Macs can see the effects. Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKG%2BADiyyHe0cun2wfT%2BSVnFVqNYPxoO6J9zcZkVO7%2BNGig%40mail.gmail.com
1 parent f157db8 commit 2dbe890

File tree

4 files changed

+83
-19
lines changed

4 files changed

+83
-19
lines changed

src/backend/storage/file/fd.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,10 +1057,46 @@ BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
10571057
int fd;
10581058

10591059
tryAgain:
1060+
#ifdef PG_O_DIRECT_USE_F_NOCACHE
1061+
1062+
/*
1063+
* The value we defined to stand in for O_DIRECT when simulating it with
1064+
* F_NOCACHE had better not collide with any of the standard flags.
1065+
*/
1066+
StaticAssertStmt((PG_O_DIRECT &
1067+
(O_APPEND |
1068+
O_CLOEXEC |
1069+
O_CREAT |
1070+
O_DSYNC |
1071+
O_RDWR |
1072+
O_RDONLY |
1073+
O_SYNC |
1074+
O_TRUNC |
1075+
O_WRONLY)) == 0,
1076+
"PG_O_DIRECT value collides with standard flag");
1077+
fd = open(fileName, fileFlags & ~PG_O_DIRECT, fileMode);
1078+
#else
10601079
fd = open(fileName, fileFlags, fileMode);
1080+
#endif
10611081

10621082
if (fd >= 0)
1083+
{
1084+
#ifdef PG_O_DIRECT_USE_F_NOCACHE
1085+
if (fileFlags & PG_O_DIRECT)
1086+
{
1087+
if (fcntl(fd, F_NOCACHE, 1) < 0)
1088+
{
1089+
int save_errno = errno;
1090+
1091+
close(fd);
1092+
errno = save_errno;
1093+
return -1;
1094+
}
1095+
}
1096+
#endif
1097+
10631098
return fd; /* success! */
1099+
}
10641100

10651101
if (errno == EMFILE || errno == ENFILE)
10661102
{

src/bin/pg_test_fsync/pg_test_fsync.c

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,10 @@ handle_args(int argc, char *argv[])
217217
"%u seconds per test\n",
218218
secs_per_test),
219219
secs_per_test);
220-
#if PG_O_DIRECT != 0
220+
#if defined(O_DIRECT)
221221
printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n"));
222+
#elif defined(F_NOCACHE)
223+
printf(_("F_NOCACHE supported on this platform for open_datasync and open_sync.\n"));
222224
#else
223225
printf(_("Direct I/O is not supported on this platform.\n"));
224226
#endif
@@ -258,6 +260,31 @@ test_open(void)
258260
close(tmpfile);
259261
}
260262

263+
static int
264+
open_direct(const char *path, int flags, mode_t mode)
265+
{
266+
int fd;
267+
268+
#ifdef O_DIRECT
269+
flags |= O_DIRECT;
270+
#endif
271+
272+
fd = open(path, flags, mode);
273+
274+
#if !defined(O_DIRECT) && defined(F_NOCACHE)
275+
if (fd >= 0 && fcntl(fd, F_NOCACHE, 1) < 0)
276+
{
277+
int save_errno = errno;
278+
279+
close(fd);
280+
errno = save_errno;
281+
return -1;
282+
}
283+
#endif
284+
285+
return fd;
286+
}
287+
261288
static void
262289
test_sync(int writes_per_op)
263290
{
@@ -279,7 +306,7 @@ test_sync(int writes_per_op)
279306
fflush(stdout);
280307

281308
#ifdef OPEN_DATASYNC_FLAG
282-
if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT | PG_BINARY, 0)) == -1)
309+
if ((tmpfile = open_direct(filename, O_RDWR | O_DSYNC | PG_BINARY, 0)) == -1)
283310
{
284311
printf(NA_FORMAT, _("n/a*"));
285312
fs_warning = true;
@@ -386,7 +413,7 @@ test_sync(int writes_per_op)
386413
fflush(stdout);
387414

388415
#ifdef OPEN_SYNC_FLAG
389-
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
416+
if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
390417
{
391418
printf(NA_FORMAT, _("n/a*"));
392419
fs_warning = true;
@@ -454,7 +481,7 @@ test_open_sync(const char *msg, int writes_size)
454481
fflush(stdout);
455482

456483
#ifdef OPEN_SYNC_FLAG
457-
if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT | PG_BINARY, 0)) == -1)
484+
if ((tmpfile = open_direct(filename, O_RDWR | OPEN_SYNC_FLAG | PG_BINARY, 0)) == -1)
458485
printf(NA_FORMAT, _("n/a*"));
459486
else
460487
{

src/include/access/xlogdefs.h

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,21 +64,6 @@ typedef uint32 TimeLineID;
6464
*/
6565
typedef uint16 RepOriginId;
6666

67-
/*
68-
* Because O_DIRECT bypasses the kernel buffers, and because we never
69-
* read those buffers except during crash recovery or if wal_level != minimal,
70-
* it is a win to use it in all cases where we sync on each write(). We could
71-
* allow O_DIRECT with fsync(), but it is unclear if fsync() could process
72-
* writes not buffered in the kernel. Also, O_DIRECT is never enough to force
73-
* data to the drives, it merely tries to bypass the kernel cache, so we still
74-
* need O_SYNC/O_DSYNC.
75-
*/
76-
#ifdef O_DIRECT
77-
#define PG_O_DIRECT O_DIRECT
78-
#else
79-
#define PG_O_DIRECT 0
80-
#endif
81-
8267
/*
8368
* This chunk of hackery attempts to determine which file sync methods
8469
* are available on the current platform, and to choose an appropriate

src/include/storage/fd.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,22 @@ extern int max_safe_fds;
7979
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES)
8080
#endif
8181

82+
/*
83+
* O_DIRECT is not standard, but almost every Unix has it. We translate it
84+
* to the appropriate Windows flag in src/port/open.c. We simulate it with
85+
* fcntl(F_NOCACHE) on macOS inside fd.c's open() wrapper. We use the name
86+
* PG_O_DIRECT rather than defining O_DIRECT in that case (probably not a good
87+
* idea on a Unix).
88+
*/
89+
#if defined(O_DIRECT)
90+
#define PG_O_DIRECT O_DIRECT
91+
#elif defined(F_NOCACHE)
92+
#define PG_O_DIRECT 0x80000000
93+
#define PG_O_DIRECT_USE_F_NOCACHE
94+
#else
95+
#define PG_O_DIRECT 0
96+
#endif
97+
8298
/*
8399
* prototypes for functions in fd.c
84100
*/

0 commit comments

Comments
 (0)