|
72 | 72 |
|
73 | 73 | #include "postgres.h"
|
74 | 74 |
|
| 75 | +#include <dirent.h> |
75 | 76 | #include <sys/file.h>
|
76 | 77 | #include <sys/param.h>
|
77 | 78 | #include <sys/stat.h>
|
| 79 | +#include <sys/types.h> |
78 | 80 | #ifndef WIN32
|
79 | 81 | #include <sys/mman.h>
|
80 | 82 | #endif
|
@@ -158,6 +160,9 @@ int max_safe_fds = FD_MINFREE; /* default if not changed */
|
158 | 160 | /* Whether it is safe to continue running after fsync() fails. */
|
159 | 161 | bool data_sync_retry = false;
|
160 | 162 |
|
| 163 | +/* How SyncDataDirectory() should do its job. */ |
| 164 | +int recovery_init_sync_method = RECOVERY_INIT_SYNC_METHOD_FSYNC; |
| 165 | + |
161 | 166 | /* Debugging.... */
|
162 | 167 |
|
163 | 168 | #ifdef FDDEBUG
|
@@ -3265,9 +3270,31 @@ looks_like_temp_rel_name(const char *name)
|
3265 | 3270 | return true;
|
3266 | 3271 | }
|
3267 | 3272 |
|
| 3273 | +#ifdef HAVE_SYNCFS |
| 3274 | +static void |
| 3275 | +do_syncfs(const char *path) |
| 3276 | +{ |
| 3277 | + int fd; |
| 3278 | + |
| 3279 | + fd = OpenTransientFile(path, O_RDONLY); |
| 3280 | + if (fd < 0) |
| 3281 | + { |
| 3282 | + ereport(LOG, |
| 3283 | + (errcode_for_file_access(), |
| 3284 | + errmsg("could not open %s: %m", path))); |
| 3285 | + return; |
| 3286 | + } |
| 3287 | + if (syncfs(fd) < 0) |
| 3288 | + ereport(LOG, |
| 3289 | + (errcode_for_file_access(), |
| 3290 | + errmsg("could not sync filesystem for \"%s\": %m", path))); |
| 3291 | + CloseTransientFile(fd); |
| 3292 | +} |
| 3293 | +#endif |
3268 | 3294 |
|
3269 | 3295 | /*
|
3270 |
| - * Issue fsync recursively on PGDATA and all its contents. |
| 3296 | + * Issue fsync recursively on PGDATA and all its contents, or issue syncfs for |
| 3297 | + * all potential filesystem, depending on recovery_init_sync_method setting. |
3271 | 3298 | *
|
3272 | 3299 | * We fsync regular files and directories wherever they are, but we
|
3273 | 3300 | * follow symlinks only for pg_wal and immediately under pg_tblspc.
|
@@ -3319,6 +3346,42 @@ SyncDataDirectory(void)
|
3319 | 3346 | xlog_is_symlink = true;
|
3320 | 3347 | #endif
|
3321 | 3348 |
|
| 3349 | +#ifdef HAVE_SYNCFS |
| 3350 | + if (recovery_init_sync_method == RECOVERY_INIT_SYNC_METHOD_SYNCFS) |
| 3351 | + { |
| 3352 | + DIR *dir; |
| 3353 | + struct dirent *de; |
| 3354 | + |
| 3355 | + /* |
| 3356 | + * On Linux, we don't have to open every single file one by one. We |
| 3357 | + * can use syncfs() to sync whole filesystems. We only expect |
| 3358 | + * filesystem boundaries to exist where we tolerate symlinks, namely |
| 3359 | + * pg_wal and the tablespaces, so we call syncfs() for each of those |
| 3360 | + * directories. |
| 3361 | + */ |
| 3362 | + |
| 3363 | + /* Sync the top level pgdata directory. */ |
| 3364 | + do_syncfs("."); |
| 3365 | + /* If any tablespaces are configured, sync each of those. */ |
| 3366 | + dir = AllocateDir("pg_tblspc"); |
| 3367 | + while ((de = ReadDirExtended(dir, "pg_tblspc", LOG))) |
| 3368 | + { |
| 3369 | + char path[MAXPGPATH]; |
| 3370 | + |
| 3371 | + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) |
| 3372 | + continue; |
| 3373 | + |
| 3374 | + snprintf(path, MAXPGPATH, "pg_tblspc/%s", de->d_name); |
| 3375 | + do_syncfs(path); |
| 3376 | + } |
| 3377 | + FreeDir(dir); |
| 3378 | + /* If pg_wal is a symlink, process that too. */ |
| 3379 | + if (xlog_is_symlink) |
| 3380 | + do_syncfs("pg_wal"); |
| 3381 | + return; |
| 3382 | + } |
| 3383 | +#endif /* !HAVE_SYNCFS */ |
| 3384 | + |
3322 | 3385 | /*
|
3323 | 3386 | * If possible, hint to the kernel that we're soon going to fsync the data
|
3324 | 3387 | * directory and its contents. Errors in this step are even less
|
|
0 commit comments