Skip to content

Commit 4abf615

Browse files
committed
Backpatch critical performance fixes to pgarch.c
This backpatches commits beb4e9b and 1fb17b1 (originally appearing in previously in REL_15_STABLE) to REL_14_STABLE. Performance of the WAL archiver can become pretty critical at times, and reports exist of users getting in serious trouble (hours of downtime, loss of replicas) because of lack of this optimization. We'd like to backpatch these to REL_13_STABLE too, but because of the very invasive changes made by commit d75288f in the 14 timeframe, we deem it too risky :-( Original commit messages appear below. Discussion: https://postgr.es/m/202411131605.m66syq5i5ucl@alvherre.pgsql commit beb4e9b Author: Robert Haas <rhaas@postgresql.org> AuthorDate: Thu Nov 11 15:02:53 2021 -0500 Improve performance of pgarch_readyXlog() with many status files. Presently, the archive_status directory was scanned for each file to archive. When there are many status files, say because archive_command has been failing for a long time, these directory scans can get very slow. With this change, the archiver remembers several files to archive during each directory scan, speeding things up. To ensure timeline history files are archived as quickly as possible, XLogArchiveNotify() forces the archiver to do a new directory scan as soon as the .ready file for one is created. Nathan Bossart, per a long discussion involving many people. It is not clear to me exactly who out of all those people reviewed this particular patch. Discussion: http://postgr.es/m/CA+TgmobhAbs2yabTuTRkJTq_kkC80-+jw=pfpypdOJ7+gAbQbw@mail.gmail.com Discussion: http://postgr.es/m/620F3CE1-0255-4D66-9D87-0EADE866985A@amazon.com commit 1fb17b1 Author: Tom Lane <tgl@sss.pgh.pa.us> AuthorDate: Wed Dec 29 17:02:50 2021 -0500 Fix issues in pgarch's new directory-scanning logic. The arch_filenames[] array elements were one byte too small, so that a maximum-length filename would get corrupted if another entry were made after it. (Noted by Thomas Munro, fix by Nathan Bossart.) Move these arrays into a palloc'd struct, so that we aren't wasting a few kilobytes of static data in each non-archiver process. Add a binaryheap_reset() call to make it plain that we start the directory scan with an empty heap. I don't think there's any live bug of that sort, but it seems fragile, and this is very cheap insurance. Cleanup for commit beb4e9b, so no back-patch needed. Discussion: https://postgr.es/m/CA+hUKGLHAjHuKuwtzsW7uMJF4BVPcQRL-UMZG_HM-g0y7yLkUg@mail.gmail.com
1 parent 315264d commit 4abf615

File tree

3 files changed

+197
-26
lines changed

3 files changed

+197
-26
lines changed

src/backend/access/transam/xlogarchive.c

+14
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,20 @@ XLogArchiveNotify(const char *xlog)
489489
return;
490490
}
491491

492+
/*
493+
* Timeline history files are given the highest archival priority to lower
494+
* the chance that a promoted standby will choose a timeline that is
495+
* already in use. However, the archiver ordinarily tries to gather
496+
* multiple files to archive from each scan of the archive_status
497+
* directory, which means that newly created timeline history files could
498+
* be left unarchived for a while. To ensure that the archiver picks up
499+
* timeline history files as soon as possible, we force the archiver to
500+
* scan the archive_status directory the next time it looks for a file to
501+
* archive.
502+
*/
503+
if (IsTLHistoryFileName(xlog))
504+
PgArchForceDirScan();
505+
492506
/* Notify archiver that it's got something to do */
493507
if (IsUnderPostmaster)
494508
PgArchWakeup();

src/backend/postmaster/pgarch.c

+182-26
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
#include "access/xlog.h"
3737
#include "access/xlog_internal.h"
38+
#include "lib/binaryheap.h"
3839
#include "libpq/pqsignal.h"
3940
#include "miscadmin.h"
4041
#include "pgstat.h"
@@ -47,6 +48,7 @@
4748
#include "storage/proc.h"
4849
#include "storage/procsignal.h"
4950
#include "storage/shmem.h"
51+
#include "storage/spin.h"
5052
#include "utils/guc.h"
5153
#include "utils/ps_status.h"
5254

@@ -72,10 +74,22 @@
7274
*/
7375
#define NUM_ORPHAN_CLEANUP_RETRIES 3
7476

77+
/*
78+
* Maximum number of .ready files to gather per directory scan.
79+
*/
80+
#define NUM_FILES_PER_DIRECTORY_SCAN 64
81+
7582
/* Shared memory area for archiver process */
7683
typedef struct PgArchData
7784
{
7885
int pgprocno; /* pgprocno of archiver process */
86+
87+
/*
88+
* Forces a directory scan in pgarch_readyXlog(). Protected by arch_lck.
89+
*/
90+
bool force_dir_scan;
91+
92+
slock_t arch_lck;
7993
} PgArchData;
8094

8195

@@ -86,6 +100,31 @@ typedef struct PgArchData
86100
static time_t last_sigterm_time = 0;
87101
static PgArchData *PgArch = NULL;
88102

103+
/*
104+
* Stuff for tracking multiple files to archive from each scan of
105+
* archive_status. Minimizing the number of directory scans when there are
106+
* many files to archive can significantly improve archival rate.
107+
*
108+
* arch_heap is a max-heap that is used during the directory scan to track
109+
* the highest-priority files to archive. After the directory scan
110+
* completes, the file names are stored in ascending order of priority in
111+
* arch_files. pgarch_readyXlog() returns files from arch_files until it
112+
* is empty, at which point another directory scan must be performed.
113+
*
114+
* We only need this data in the archiver process, so make it a palloc'd
115+
* struct rather than a bunch of static arrays.
116+
*/
117+
struct arch_files_state
118+
{
119+
binaryheap *arch_heap;
120+
int arch_files_size; /* number of live entries in arch_files[] */
121+
char *arch_files[NUM_FILES_PER_DIRECTORY_SCAN];
122+
/* buffers underlying heap, and later arch_files[], entries: */
123+
char arch_filenames[NUM_FILES_PER_DIRECTORY_SCAN][MAX_XFN_CHARS + 1];
124+
};
125+
126+
static struct arch_files_state *arch_files = NULL;
127+
89128
/*
90129
* Flags set by interrupt handlers for later service in the main loop.
91130
*/
@@ -103,6 +142,7 @@ static bool pgarch_readyXlog(char *xlog);
103142
static void pgarch_archiveDone(char *xlog);
104143
static void pgarch_die(int code, Datum arg);
105144
static void HandlePgArchInterrupts(void);
145+
static int ready_file_comparator(Datum a, Datum b, void *arg);
106146

107147
/* Report shared memory space needed by PgArchShmemInit */
108148
Size
@@ -129,6 +169,7 @@ PgArchShmemInit(void)
129169
/* First time through, so initialize */
130170
MemSet(PgArch, 0, PgArchShmemSize());
131171
PgArch->pgprocno = INVALID_PGPROCNO;
172+
SpinLockInit(&PgArch->arch_lck);
132173
}
133174
}
134175

@@ -198,6 +239,14 @@ PgArchiverMain(void)
198239
*/
199240
PgArch->pgprocno = MyProc->pgprocno;
200241

242+
/* Create workspace for pgarch_readyXlog() */
243+
arch_files = palloc(sizeof(struct arch_files_state));
244+
arch_files->arch_files_size = 0;
245+
246+
/* Initialize our max-heap for prioritizing files to archive. */
247+
arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
248+
ready_file_comparator, NULL);
249+
201250
pgarch_MainLoop();
202251

203252
proc_exit(0);
@@ -325,6 +374,9 @@ pgarch_ArchiverCopyLoop(void)
325374
{
326375
char xlog[MAX_XFN_CHARS + 1];
327376

377+
/* force directory scan in the first call to pgarch_readyXlog() */
378+
arch_files->arch_files_size = 0;
379+
328380
/*
329381
* loop through all xlogs with archive_status of .ready and archive
330382
* them...mostly we expect this to be a single file, though it is possible
@@ -600,26 +652,65 @@ pgarch_archiveXlog(char *xlog)
600652
static bool
601653
pgarch_readyXlog(char *xlog)
602654
{
603-
/*
604-
* open xlog status directory and read through list of xlogs that have the
605-
* .ready suffix, looking for earliest file. It is possible to optimise
606-
* this code, though only a single file is expected on the vast majority
607-
* of calls, so....
608-
*/
609655
char XLogArchiveStatusDir[MAXPGPATH];
610656
DIR *rldir;
611657
struct dirent *rlde;
612-
bool found = false;
613-
bool historyFound = false;
658+
bool force_dir_scan;
659+
660+
/*
661+
* If a directory scan was requested, clear the stored file names and
662+
* proceed.
663+
*/
664+
SpinLockAcquire(&PgArch->arch_lck);
665+
force_dir_scan = PgArch->force_dir_scan;
666+
PgArch->force_dir_scan = false;
667+
SpinLockRelease(&PgArch->arch_lck);
668+
669+
if (force_dir_scan)
670+
arch_files->arch_files_size = 0;
671+
672+
/*
673+
* If we still have stored file names from the previous directory scan,
674+
* try to return one of those. We check to make sure the status file is
675+
* still present, as the archive_command for a previous file may have
676+
* already marked it done.
677+
*/
678+
while (arch_files->arch_files_size > 0)
679+
{
680+
struct stat st;
681+
char status_file[MAXPGPATH];
682+
char *arch_file;
683+
684+
arch_files->arch_files_size--;
685+
arch_file = arch_files->arch_files[arch_files->arch_files_size];
686+
StatusFilePath(status_file, arch_file, ".ready");
687+
688+
if (stat(status_file, &st) == 0)
689+
{
690+
strcpy(xlog, arch_file);
691+
return true;
692+
}
693+
else if (errno != ENOENT)
694+
ereport(ERROR,
695+
(errcode_for_file_access(),
696+
errmsg("could not stat file \"%s\": %m", status_file)));
697+
}
614698

699+
/* arch_heap is probably empty, but let's make sure */
700+
binaryheap_reset(arch_files->arch_heap);
701+
702+
/*
703+
* Open the archive status directory and read through the list of files
704+
* with the .ready suffix, looking for the earliest files.
705+
*/
615706
snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status");
616707
rldir = AllocateDir(XLogArchiveStatusDir);
617708

618709
while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL)
619710
{
620711
int basenamelen = (int) strlen(rlde->d_name) - 6;
621712
char basename[MAX_XFN_CHARS + 1];
622-
bool ishistory;
713+
char *arch_file;
623714

624715
/* Ignore entries with unexpected number of characters */
625716
if (basenamelen < MIN_XFN_CHARS ||
@@ -638,32 +729,97 @@ pgarch_readyXlog(char *xlog)
638729
memcpy(basename, rlde->d_name, basenamelen);
639730
basename[basenamelen] = '\0';
640731

641-
/* Is this a history file? */
642-
ishistory = IsTLHistoryFileName(basename);
643-
644732
/*
645-
* Consume the file to archive. History files have the highest
646-
* priority. If this is the first file or the first history file
647-
* ever, copy it. In the presence of a history file already chosen as
648-
* target, ignore all other files except history files which have been
649-
* generated for an older timeline than what is already chosen as
650-
* target to archive.
733+
* Store the file in our max-heap if it has a high enough priority.
651734
*/
652-
if (!found || (ishistory && !historyFound))
735+
if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
653736
{
654-
strcpy(xlog, basename);
655-
found = true;
656-
historyFound = ishistory;
737+
/* If the heap isn't full yet, quickly add it. */
738+
arch_file = arch_files->arch_filenames[arch_files->arch_heap->bh_size];
739+
strcpy(arch_file, basename);
740+
binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file));
741+
742+
/* If we just filled the heap, make it a valid one. */
743+
if (arch_files->arch_heap->bh_size == NUM_FILES_PER_DIRECTORY_SCAN)
744+
binaryheap_build(arch_files->arch_heap);
657745
}
658-
else if (ishistory || !historyFound)
746+
else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap),
747+
CStringGetDatum(basename), NULL) > 0)
659748
{
660-
if (strcmp(basename, xlog) < 0)
661-
strcpy(xlog, basename);
749+
/*
750+
* Remove the lowest priority file and add the current one to the
751+
* heap.
752+
*/
753+
arch_file = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
754+
strcpy(arch_file, basename);
755+
binaryheap_add(arch_files->arch_heap, CStringGetDatum(arch_file));
662756
}
663757
}
664758
FreeDir(rldir);
665759

666-
return found;
760+
/* If no files were found, simply return. */
761+
if (arch_files->arch_heap->bh_size == 0)
762+
return false;
763+
764+
/*
765+
* If we didn't fill the heap, we didn't make it a valid one. Do that
766+
* now.
767+
*/
768+
if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
769+
binaryheap_build(arch_files->arch_heap);
770+
771+
/*
772+
* Fill arch_files array with the files to archive in ascending order of
773+
* priority.
774+
*/
775+
arch_files->arch_files_size = arch_files->arch_heap->bh_size;
776+
for (int i = 0; i < arch_files->arch_files_size; i++)
777+
arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
778+
779+
/* Return the highest priority file. */
780+
arch_files->arch_files_size--;
781+
strcpy(xlog, arch_files->arch_files[arch_files->arch_files_size]);
782+
783+
return true;
784+
}
785+
786+
/*
787+
* ready_file_comparator
788+
*
789+
* Compares the archival priority of the given files to archive. If "a"
790+
* has a higher priority than "b", a negative value will be returned. If
791+
* "b" has a higher priority than "a", a positive value will be returned.
792+
* If "a" and "b" have equivalent values, 0 will be returned.
793+
*/
794+
static int
795+
ready_file_comparator(Datum a, Datum b, void *arg)
796+
{
797+
char *a_str = DatumGetCString(a);
798+
char *b_str = DatumGetCString(b);
799+
bool a_history = IsTLHistoryFileName(a_str);
800+
bool b_history = IsTLHistoryFileName(b_str);
801+
802+
/* Timeline history files always have the highest priority. */
803+
if (a_history != b_history)
804+
return a_history ? -1 : 1;
805+
806+
/* Priority is given to older files. */
807+
return strcmp(a_str, b_str);
808+
}
809+
810+
/*
811+
* PgArchForceDirScan
812+
*
813+
* When called, the next call to pgarch_readyXlog() will perform a
814+
* directory scan. This is useful for ensuring that important files such
815+
* as timeline history files are archived as quickly as possible.
816+
*/
817+
void
818+
PgArchForceDirScan(void)
819+
{
820+
SpinLockAcquire(&PgArch->arch_lck);
821+
PgArch->force_dir_scan = true;
822+
SpinLockRelease(&PgArch->arch_lck);
667823
}
668824

669825
/*

src/include/postmaster/pgarch.h

+1
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,6 @@ extern void PgArchShmemInit(void);
3131
extern bool PgArchCanRestart(void);
3232
extern void PgArchiverMain(void) pg_attribute_noreturn();
3333
extern void PgArchWakeup(void);
34+
extern void PgArchForceDirScan(void);
3435

3536
#endif /* _PGARCH_H */

0 commit comments

Comments
 (0)