Skip to content

Commit 1b02be2

Browse files
committed
Fsync directory after creating or unlinking file.
If file was created/deleted just before powerloss it's possible that file system will miss that. To prevent it, call fsync() where creating/ unlinkg file is critical. Author: Michael Paquier Reviewed-by: Ashutosh Bapat, Takayuki Tsunakawa, me
1 parent 1f171a1 commit 1b02be2

File tree

6 files changed

+78
-13
lines changed

6 files changed

+78
-13
lines changed

src/backend/access/transam/clog.c

+14
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,13 @@ ShutdownCLOG(void)
577577
/* Flush dirty CLOG pages to disk */
578578
TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
579579
SimpleLruFlush(ClogCtl, false);
580+
581+
/*
582+
* fsync pg_xact to ensure that any files flushed previously are durably
583+
* on disk.
584+
*/
585+
fsync_fname("pg_xact", true);
586+
580587
TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
581588
}
582589

@@ -589,6 +596,13 @@ CheckPointCLOG(void)
589596
/* Flush dirty CLOG pages to disk */
590597
TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
591598
SimpleLruFlush(ClogCtl, true);
599+
600+
/*
601+
* fsync pg_xact to ensure that any files flushed previously are durably
602+
* on disk.
603+
*/
604+
fsync_fname("pg_xact", true);
605+
592606
TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
593607
}
594608

src/backend/access/transam/commit_ts.c

+12
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,12 @@ ShutdownCommitTs(void)
746746
{
747747
/* Flush dirty CommitTs pages to disk */
748748
SimpleLruFlush(CommitTsCtl, false);
749+
750+
/*
751+
* fsync pg_commit_ts to ensure that any files flushed previously are durably
752+
* on disk.
753+
*/
754+
fsync_fname("pg_commit_ts", true);
749755
}
750756

751757
/*
@@ -756,6 +762,12 @@ CheckPointCommitTs(void)
756762
{
757763
/* Flush dirty CommitTs pages to disk */
758764
SimpleLruFlush(CommitTsCtl, true);
765+
766+
/*
767+
* fsync pg_commit_ts to ensure that any files flushed previously are durably
768+
* on disk.
769+
*/
770+
fsync_fname("pg_commit_ts", true);
759771
}
760772

761773
/*

src/backend/access/transam/twophase.c

+8
Original file line numberDiff line numberDiff line change
@@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
16501650
}
16511651
LWLockRelease(TwoPhaseStateLock);
16521652

1653+
/*
1654+
* Flush unconditionally the parent directory to make any information
1655+
* durable on disk. Two-phase files could have been removed and those
1656+
* removals need to be made persistent as well as any files newly created
1657+
* previously since the last checkpoint.
1658+
*/
1659+
fsync_fname(TWOPHASE_DIR, true);
1660+
16531661
TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
16541662

16551663
if (log_checkpoints && serialized_xacts > 0)

src/backend/access/transam/xlog.c

+6-13
Original file line numberDiff line numberDiff line change
@@ -3475,7 +3475,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
34753475
if (!find_free)
34763476
{
34773477
/* Force installation: get rid of any pre-existing segment file */
3478-
unlink(path);
3478+
durable_unlink(path, DEBUG1);
34793479
}
34803480
else
34813481
{
@@ -4026,16 +4026,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
40264026
path)));
40274027
return;
40284028
}
4029-
rc = unlink(newpath);
4029+
rc = durable_unlink(newpath, LOG);
40304030
#else
4031-
rc = unlink(path);
4031+
rc = durable_unlink(path, LOG);
40324032
#endif
40334033
if (rc != 0)
40344034
{
4035-
ereport(LOG,
4036-
(errcode_for_file_access(),
4037-
errmsg("could not remove old transaction log file \"%s\": %m",
4038-
path)));
4035+
/* Message already logged by durable_unlink() */
40394036
return;
40404037
}
40414038
CheckpointStats.ckpt_segs_removed++;
@@ -10771,17 +10768,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
1077110768
(errcode_for_file_access(),
1077210769
errmsg("could not read file \"%s\": %m",
1077310770
BACKUP_LABEL_FILE)));
10774-
if (unlink(BACKUP_LABEL_FILE) != 0)
10775-
ereport(ERROR,
10776-
(errcode_for_file_access(),
10777-
errmsg("could not remove file \"%s\": %m",
10778-
BACKUP_LABEL_FILE)));
10771+
durable_unlink(BACKUP_LABEL_FILE, ERROR);
1077910772

1078010773
/*
1078110774
* Remove tablespace_map file if present, it is created only if there
1078210775
* are tablespaces.
1078310776
*/
10784-
unlink(TABLESPACE_MAP);
10777+
durable_unlink(TABLESPACE_MAP, DEBUG1);
1078510778
}
1078610779
PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
1078710780
}

src/backend/storage/file/fd.c

+37
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel)
657657
return 0;
658658
}
659659

660+
/*
661+
* durable_unlink -- remove a file in a durable manner
662+
*
663+
* This routine ensures that, after returning, the effect of removing file
664+
* persists in case of a crash. A crash while this routine is running will
665+
* leave the system in no mixed state.
666+
*
667+
* It does so by using fsync on the parent directory of the file after the
668+
* actual removal is done.
669+
*
670+
* Log errors with the severity specified by caller.
671+
*
672+
* Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not
673+
* valid upon return.
674+
*/
675+
int
676+
durable_unlink(const char *fname, int elevel)
677+
{
678+
if (unlink(fname) < 0)
679+
{
680+
ereport(elevel,
681+
(errcode_for_file_access(),
682+
errmsg("could not remove file \"%s\": %m",
683+
fname)));
684+
return -1;
685+
}
686+
687+
/*
688+
* To guarantee that the removal of the file is persistent, fsync
689+
* its parent directory.
690+
*/
691+
if (fsync_parent_path(fname, elevel) != 0)
692+
return -1;
693+
694+
return 0;
695+
}
696+
660697
/*
661698
* durable_link_or_rename -- rename a file in a durable manner.
662699
*

src/include/storage/fd.h

+1
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ extern int pg_fdatasync(int fd);
119119
extern void pg_flush_data(int fd, off_t offset, off_t amount);
120120
extern void fsync_fname(const char *fname, bool isdir);
121121
extern int durable_rename(const char *oldfile, const char *newfile, int loglevel);
122+
extern int durable_unlink(const char *fname, int loglevel);
122123
extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
123124
extern void SyncDataDirectory(void);
124125

0 commit comments

Comments
 (0)