Skip to content

Commit c4a153d

Browse files
committed
Improve heuristics for compressing the KnownAssignedXids array.
Previously, we'd compress only when the active range of array entries reached Max(4 * PROCARRAY_MAXPROCS, 2 * pArray->numKnownAssignedXids). If max_connections is large, the first term could result in not compressing for a long time, resulting in much wastage of cycles in hot-standby backends scanning the array to take snapshots. Get rid of that term, and just bound it to 2 * pArray->numKnownAssignedXids. That however creates the opposite risk, that we might spend too much effort compressing. Hence, consider compressing only once every 128 commit records. (This frequency was chosen by benchmarking. While we only tried one benchmark scenario, the results seem stable over a fairly wide range of frequencies.) Also, force compression when processing RecoveryInfo WAL records (which should be infrequent); the old code could perform compression then, but would do so only after the same array-range check as for the transaction-commit path. Also, opportunistically run compression if the startup process is about to wait for WAL, though not oftener than once a second. This should prevent cases where we waste lots of time by leaving the array not-compressed for long intervals due to low WAL traffic. Lastly, add a simple check to keep us from uselessly compressing when the array storage is already compact. Back-patch, as the performance problem is worse in pre-v14 branches than in HEAD. Simon Riggs and Michail Nikolaev, with help from Tom Lane and Andres Freund. Discussion: https://postgr.es/m/CALdSSPgahNUD_=pB_j=1zSnDBaiOtqVfzo8Ejt5J_k7qZiU1Tw@mail.gmail.com
1 parent bb8d48c commit c4a153d

File tree

3 files changed

+110
-30
lines changed

3 files changed

+110
-30
lines changed

src/backend/access/transam/xlog.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12301,6 +12301,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1230112301
wait_time = wal_retrieve_retry_interval -
1230212302
TimestampDifferenceMilliseconds(last_fail_time, now);
1230312303

12304+
/* Do background tasks that might benefit us later. */
12305+
KnownAssignedTransactionIdsIdleMaintenance();
12306+
1230412307
(void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
1230512308
WL_LATCH_SET | WL_TIMEOUT |
1230612309
WL_EXIT_ON_PM_DEATH,
@@ -12498,6 +12501,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
1249812501
streaming_reply_sent = true;
1249912502
}
1250012503

12504+
/* Do any background tasks that might benefit us later. */
12505+
KnownAssignedTransactionIdsIdleMaintenance();
12506+
1250112507
/*
1250212508
* Wait for more WAL to arrive. Time out after 5 seconds
1250312509
* to react to a trigger file promptly and to check if the

src/backend/storage/ipc/procarray.c

Lines changed: 103 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ static ProcArrayStruct *procArray;
101101
static PGPROC *allProcs;
102102
static PGXACT *allPgXact;
103103

104+
/*
105+
* Reason codes for KnownAssignedXidsCompress().
106+
*/
107+
typedef enum KAXCompressReason
108+
{
109+
KAX_NO_SPACE, /* need to free up space at array end */
110+
KAX_PRUNE, /* we just pruned old entries */
111+
KAX_TRANSACTION_END, /* we just committed/removed some XIDs */
112+
KAX_STARTUP_PROCESS_IDLE /* startup process is about to sleep */
113+
} KAXCompressReason;
114+
104115
/*
105116
* Cache to reduce overhead of repeated calls to TransactionIdIsInProgress()
106117
*/
@@ -163,7 +174,7 @@ static bool HaveVirtualXIDsDelayingChkptGuts(VirtualTransactionId *vxids,
163174
int nvxids, int type);
164175

165176
/* Primitives for KnownAssignedXids array handling for standby */
166-
static void KnownAssignedXidsCompress(bool force);
177+
static void KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock);
167178
static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
168179
bool exclusive_lock);
169180
static bool KnownAssignedXidsSearch(TransactionId xid, bool remove);
@@ -3415,6 +3426,17 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
34153426
LWLockRelease(ProcArrayLock);
34163427
}
34173428

3429+
/*
3430+
* KnownAssignedTransactionIdsIdleMaintenance
3431+
* Opportunistically do maintenance work when the startup process
3432+
* is about to go idle.
3433+
*/
3434+
void
3435+
KnownAssignedTransactionIdsIdleMaintenance(void)
3436+
{
3437+
KnownAssignedXidsCompress(KAX_STARTUP_PROCESS_IDLE, false);
3438+
}
3439+
34183440

34193441
/*
34203442
* Private module functions to manipulate KnownAssignedXids
@@ -3497,50 +3519,101 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
34973519
* so there is an optimal point for any workload mix. We use a heuristic to
34983520
* decide when to compress the array, though trimming also helps reduce
34993521
* frequency of compressing. The heuristic requires us to track the number of
3500-
* currently valid XIDs in the array.
3522+
* currently valid XIDs in the array (N). Except in special cases, we'll
3523+
* compress when S >= 2N. Bounding S at 2N in turn bounds the time for
3524+
* taking a snapshot to be O(N), which it would have to be anyway.
35013525
*/
35023526

35033527

35043528
/*
35053529
* Compress KnownAssignedXids by shifting valid data down to the start of the
35063530
* array, removing any gaps.
35073531
*
3508-
* A compression step is forced if "force" is true, otherwise we do it
3509-
* only if a heuristic indicates it's a good time to do it.
3532+
* A compression step is forced if "reason" is KAX_NO_SPACE, otherwise
3533+
* we do it only if a heuristic indicates it's a good time to do it.
35103534
*
3511-
* Caller must hold ProcArrayLock in exclusive mode.
3535+
* Compression requires holding ProcArrayLock in exclusive mode.
3536+
* Caller must pass haveLock = true if it already holds the lock.
35123537
*/
35133538
static void
3514-
KnownAssignedXidsCompress(bool force)
3539+
KnownAssignedXidsCompress(KAXCompressReason reason, bool haveLock)
35153540
{
35163541
ProcArrayStruct *pArray = procArray;
35173542
int head,
3518-
tail;
3543+
tail,
3544+
nelements;
35193545
int compress_index;
35203546
int i;
35213547

3522-
/* no spinlock required since we hold ProcArrayLock exclusively */
3548+
/* Counters for compression heuristics */
3549+
static unsigned int transactionEndsCounter;
3550+
static TimestampTz lastCompressTs;
3551+
3552+
/* Tuning constants */
3553+
#define KAX_COMPRESS_FREQUENCY 128 /* in transactions */
3554+
#define KAX_COMPRESS_IDLE_INTERVAL 1000 /* in ms */
3555+
3556+
/*
3557+
* Since only the startup process modifies the head/tail pointers, we
3558+
* don't need a lock to read them here.
3559+
*/
35233560
head = pArray->headKnownAssignedXids;
35243561
tail = pArray->tailKnownAssignedXids;
3562+
nelements = head - tail;
35253563

3526-
if (!force)
3564+
/*
3565+
* If we can choose whether to compress, use a heuristic to avoid
3566+
* compressing too often or not often enough. "Compress" here simply
3567+
* means moving the values to the beginning of the array, so it is not as
3568+
* complex or costly as typical data compression algorithms.
3569+
*/
3570+
if (nelements == pArray->numKnownAssignedXids)
35273571
{
35283572
/*
3529-
* If we can choose how much to compress, use a heuristic to avoid
3530-
* compressing too often or not often enough.
3531-
*
3532-
* Heuristic is if we have a large enough current spread and less than
3533-
* 50% of the elements are currently in use, then compress. This
3534-
* should ensure we compress fairly infrequently. We could compress
3535-
* less often though the virtual array would spread out more and
3536-
* snapshots would become more expensive.
3573+
* When there are no gaps between head and tail, don't bother to
3574+
* compress, except in the KAX_NO_SPACE case where we must compress to
3575+
* create some space after the head.
35373576
*/
3538-
int nelements = head - tail;
3577+
if (reason != KAX_NO_SPACE)
3578+
return;
3579+
}
3580+
else if (reason == KAX_TRANSACTION_END)
3581+
{
3582+
/*
3583+
* Consider compressing only once every so many commits. Frequency
3584+
* determined by benchmarks.
3585+
*/
3586+
if ((transactionEndsCounter++) % KAX_COMPRESS_FREQUENCY != 0)
3587+
return;
35393588

3540-
if (nelements < 4 * PROCARRAY_MAXPROCS ||
3541-
nelements < 2 * pArray->numKnownAssignedXids)
3589+
/*
3590+
* Furthermore, compress only if the used part of the array is less
3591+
* than 50% full (see comments above).
3592+
*/
3593+
if (nelements < 2 * pArray->numKnownAssignedXids)
35423594
return;
35433595
}
3596+
else if (reason == KAX_STARTUP_PROCESS_IDLE)
3597+
{
3598+
/*
3599+
* We're about to go idle for lack of new WAL, so we might as well
3600+
* compress. But not too often, to avoid ProcArray lock contention
3601+
* with readers.
3602+
*/
3603+
if (lastCompressTs != 0)
3604+
{
3605+
TimestampTz compress_after;
3606+
3607+
compress_after = TimestampTzPlusMilliseconds(lastCompressTs,
3608+
KAX_COMPRESS_IDLE_INTERVAL);
3609+
if (GetCurrentTimestamp() < compress_after)
3610+
return;
3611+
}
3612+
}
3613+
3614+
/* Need to compress, so get the lock if we don't have it. */
3615+
if (!haveLock)
3616+
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
35443617

35453618
/*
35463619
* We compress the array by reading the valid values from tail to head,
@@ -3556,9 +3629,16 @@ KnownAssignedXidsCompress(bool force)
35563629
compress_index++;
35573630
}
35583631
}
3632+
Assert(compress_index == pArray->numKnownAssignedXids);
35593633

35603634
pArray->tailKnownAssignedXids = 0;
35613635
pArray->headKnownAssignedXids = compress_index;
3636+
3637+
if (!haveLock)
3638+
LWLockRelease(ProcArrayLock);
3639+
3640+
/* Update timestamp for maintenance. No need to hold lock for this. */
3641+
lastCompressTs = GetCurrentTimestamp();
35623642
}
35633643

35643644
/*
@@ -3630,18 +3710,11 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
36303710
*/
36313711
if (head + nxids > pArray->maxKnownAssignedXids)
36323712
{
3633-
/* must hold lock to compress */
3634-
if (!exclusive_lock)
3635-
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
3636-
3637-
KnownAssignedXidsCompress(true);
3713+
KnownAssignedXidsCompress(KAX_NO_SPACE, exclusive_lock);
36383714

36393715
head = pArray->headKnownAssignedXids;
36403716
/* note: we no longer care about the tail pointer */
36413717

3642-
if (!exclusive_lock)
3643-
LWLockRelease(ProcArrayLock);
3644-
36453718
/*
36463719
* If it still won't fit then we're out of memory
36473720
*/
@@ -3835,7 +3908,7 @@ KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
38353908
KnownAssignedXidsRemove(subxids[i]);
38363909

38373910
/* Opportunistically compress the array */
3838-
KnownAssignedXidsCompress(false);
3911+
KnownAssignedXidsCompress(KAX_TRANSACTION_END, true);
38393912
}
38403913

38413914
/*
@@ -3910,7 +3983,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
39103983
}
39113984

39123985
/* Opportunistically compress the array */
3913-
KnownAssignedXidsCompress(false);
3986+
KnownAssignedXidsCompress(KAX_PRUNE, true);
39143987
}
39153988

39163989
/*

src/include/storage/procarray.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
7474
TransactionId max_xid);
7575
extern void ExpireAllKnownAssignedTransactionIds(void);
7676
extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
77+
extern void KnownAssignedTransactionIdsIdleMaintenance(void);
7778

7879
extern int GetMaxSnapshotXidCount(void);
7980
extern int GetMaxSnapshotSubxidCount(void);

0 commit comments

Comments
 (0)