Skip to content

Commit efb8acc

Browse files
Replace binaryheap + index with pairingheap in reorderbuffer.c
A pairing heap can perform the same operations as the binary heap + index, with as good or better algorithmic complexity, and that's an existing data structure so that we don't need to invent anything new compared to v16. This commit makes the new binaryheap functionality that was added in commits b840508 and bcb14f4 unnecessary, but they will be reverted separately. Remove the optimization to only build and maintain the heap when the amount of memory used is close to the limit, becuase the bookkeeping overhead with the pairing heap seems to be small enough that it doesn't matter in practice. Reported-by: Jeff Davis Author: Heikki Linnakangas Reviewed-by: Michael Paquier, Hayato Kuroda, Masahiko Sawada Discussion: https://postgr.es/m/12747c15811d94efcc5cda72d6b35c80d7bf3443.camel%40j-davis.com
1 parent 9422199 commit efb8acc

File tree

2 files changed

+30
-170
lines changed

2 files changed

+30
-170
lines changed

src/backend/replication/logical/reorderbuffer.c

Lines changed: 23 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,9 @@
6868
* memory gets actually freed.
6969
*
7070
* We use a max-heap with transaction size as the key to efficiently find
71-
* the largest transaction. While the max-heap is empty, we don't update
72-
* the max-heap when updating the memory counter. Therefore, we can get
73-
* the largest transaction in O(N) time, where N is the number of
74-
* transactions including top-level transactions and subtransactions.
75-
*
76-
* We build the max-heap just before selecting the largest transactions
77-
* if the number of transactions being decoded is higher than the threshold,
78-
* MAX_HEAP_TXN_COUNT_THRESHOLD. After building the max-heap, we also
79-
* update the max-heap when updating the memory counter. The intention is
80-
* to efficiently find the largest transaction in O(1) time instead of
81-
* incurring the cost of memory counter updates (O(log N)). Once the number
82-
* of transactions got lower than the threshold, we reset the max-heap
83-
* (refer to ReorderBufferMaybeResetMaxHeap() for details).
71+
* the largest transaction. We update the max-heap whenever the memory
72+
* counter is updated; however transactions with size 0 are not stored in
73+
* the heap, because they have no changes to evict.
8474
*
8575
* We still rely on max_changes_in_memory when loading serialized changes
8676
* back into memory. At that point we can't use the memory limit directly
@@ -122,23 +112,6 @@
122112
#include "utils/rel.h"
123113
#include "utils/relfilenumbermap.h"
124114

125-
/*
126-
* Threshold of the total number of top-level and sub transactions that
127-
* controls whether we use the max-heap for tracking their sizes. Although
128-
* using the max-heap to select the largest transaction is effective when
129-
* there are many transactions being decoded, maintaining the max-heap while
130-
* updating the memory statistics can be costly. Therefore, we use
131-
* MaxConnections as the threshold so that we use the max-heap only when
132-
* using subtransactions.
133-
*/
134-
#define MAX_HEAP_TXN_COUNT_THRESHOLD MaxConnections
135-
136-
/*
137-
* A macro to check if the max-heap is ready to use and needs to be updated
138-
* accordingly.
139-
*/
140-
#define ReorderBufferMaxHeapIsReady(rb) !binaryheap_empty((rb)->txn_heap)
141-
142115
/* entry for a hash table we use to map from xid to our transaction state */
143116
typedef struct ReorderBufferTXNByIdEnt
144117
{
@@ -290,9 +263,7 @@ static void ReorderBufferTruncateTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
290263
static void ReorderBufferCleanupSerializedTXNs(const char *slotname);
291264
static void ReorderBufferSerializedPath(char *path, ReplicationSlot *slot,
292265
TransactionId xid, XLogSegNo segno);
293-
static void ReorderBufferBuildMaxHeap(ReorderBuffer *rb);
294-
static void ReorderBufferMaybeResetMaxHeap(ReorderBuffer *rb);
295-
static int ReorderBufferTXNSizeCompare(Datum a, Datum b, void *arg);
266+
static int ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg);
296267

297268
static void ReorderBufferFreeSnap(ReorderBuffer *rb, Snapshot snap);
298269
static Snapshot ReorderBufferCopySnap(ReorderBuffer *rb, Snapshot orig_snap,
@@ -390,16 +361,8 @@ ReorderBufferAllocate(void)
390361
buffer->outbufsize = 0;
391362
buffer->size = 0;
392363

393-
/*
394-
* The binaryheap is indexed for faster manipulations.
395-
*
396-
* We allocate the initial heap size greater than
397-
* MAX_HEAP_TXN_COUNT_THRESHOLD because the txn_heap will not be used
398-
* until the threshold is exceeded.
399-
*/
400-
buffer->txn_heap = binaryheap_allocate(MAX_HEAP_TXN_COUNT_THRESHOLD * 2,
401-
ReorderBufferTXNSizeCompare,
402-
true, NULL);
364+
/* txn_heap is ordered by transaction size */
365+
buffer->txn_heap = pairingheap_allocate(ReorderBufferTXNSizeCompare, NULL);
403366

404367
buffer->spillTxns = 0;
405368
buffer->spillCount = 0;
@@ -1637,12 +1600,6 @@ ReorderBufferCleanupTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
16371600

16381601
/* deallocate */
16391602
ReorderBufferReturnTXN(rb, txn);
1640-
1641-
/*
1642-
* After cleaning up one transaction, the number of transactions might get
1643-
* lower than the threshold for the max-heap.
1644-
*/
1645-
ReorderBufferMaybeResetMaxHeap(rb);
16461603
}
16471604

16481605
/*
@@ -3265,20 +3222,18 @@ ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb,
32653222

32663223
if (addition)
32673224
{
3225+
Size oldsize = txn->size;
3226+
32683227
txn->size += sz;
32693228
rb->size += sz;
32703229

32713230
/* Update the total size in the top transaction. */
32723231
toptxn->total_size += sz;
32733232

3274-
/* Update the max-heap as well if necessary */
3275-
if (ReorderBufferMaxHeapIsReady(rb))
3276-
{
3277-
if ((txn->size - sz) == 0)
3278-
binaryheap_add(rb->txn_heap, PointerGetDatum(txn));
3279-
else
3280-
binaryheap_update_up(rb->txn_heap, PointerGetDatum(txn));
3281-
}
3233+
/* Update the max-heap */
3234+
if (oldsize != 0)
3235+
pairingheap_remove(rb->txn_heap, &txn->txn_node);
3236+
pairingheap_add(rb->txn_heap, &txn->txn_node);
32823237
}
32833238
else
32843239
{
@@ -3289,14 +3244,10 @@ ReorderBufferChangeMemoryUpdate(ReorderBuffer *rb,
32893244
/* Update the total size in the top transaction. */
32903245
toptxn->total_size -= sz;
32913246

3292-
/* Update the max-heap as well if necessary */
3293-
if (ReorderBufferMaxHeapIsReady(rb))
3294-
{
3295-
if (txn->size == 0)
3296-
binaryheap_remove_node_ptr(rb->txn_heap, PointerGetDatum(txn));
3297-
else
3298-
binaryheap_update_down(rb->txn_heap, PointerGetDatum(txn));
3299-
}
3247+
/* Update the max-heap */
3248+
pairingheap_remove(rb->txn_heap, &txn->txn_node);
3249+
if (txn->size != 0)
3250+
pairingheap_add(rb->txn_heap, &txn->txn_node);
33003251
}
33013252

33023253
Assert(txn->size <= rb->size);
@@ -3555,10 +3506,10 @@ ReorderBufferSerializeReserve(ReorderBuffer *rb, Size sz)
35553506

35563507
/* Compare two transactions by size */
35573508
static int
3558-
ReorderBufferTXNSizeCompare(Datum a, Datum b, void *arg)
3509+
ReorderBufferTXNSizeCompare(const pairingheap_node *a, const pairingheap_node *b, void *arg)
35593510
{
3560-
ReorderBufferTXN *ta = (ReorderBufferTXN *) DatumGetPointer(a);
3561-
ReorderBufferTXN *tb = (ReorderBufferTXN *) DatumGetPointer(b);
3511+
const ReorderBufferTXN *ta = pairingheap_const_container(ReorderBufferTXN, txn_node, a);
3512+
const ReorderBufferTXN *tb = pairingheap_const_container(ReorderBufferTXN, txn_node, b);
35623513

35633514
if (ta->size < tb->size)
35643515
return -1;
@@ -3568,106 +3519,16 @@ ReorderBufferTXNSizeCompare(Datum a, Datum b, void *arg)
35683519
}
35693520

35703521
/*
3571-
* Build the max-heap. The heap assembly step is deferred until the end, for
3572-
* efficiency.
3573-
*/
3574-
static void
3575-
ReorderBufferBuildMaxHeap(ReorderBuffer *rb)
3576-
{
3577-
HASH_SEQ_STATUS hash_seq;
3578-
ReorderBufferTXNByIdEnt *ent;
3579-
3580-
Assert(binaryheap_empty(rb->txn_heap));
3581-
3582-
hash_seq_init(&hash_seq, rb->by_txn);
3583-
while ((ent = hash_seq_search(&hash_seq)) != NULL)
3584-
{
3585-
ReorderBufferTXN *txn = ent->txn;
3586-
3587-
if (txn->size == 0)
3588-
continue;
3589-
3590-
binaryheap_add_unordered(rb->txn_heap, PointerGetDatum(txn));
3591-
}
3592-
3593-
binaryheap_build(rb->txn_heap);
3594-
}
3595-
3596-
/*
3597-
* Reset the max-heap if the number of transactions got lower than the
3598-
* threshold.
3599-
*/
3600-
static void
3601-
ReorderBufferMaybeResetMaxHeap(ReorderBuffer *rb)
3602-
{
3603-
/*
3604-
* If we add and remove transactions right around the threshold, we could
3605-
* easily end up "thrashing". To avoid it, we adapt 10% of transactions to
3606-
* reset the max-heap.
3607-
*/
3608-
if (ReorderBufferMaxHeapIsReady(rb) &&
3609-
binaryheap_size(rb->txn_heap) < MAX_HEAP_TXN_COUNT_THRESHOLD * 0.9)
3610-
binaryheap_reset(rb->txn_heap);
3611-
}
3612-
3613-
/*
3614-
* Find the largest transaction (toplevel or subxact) to evict (spill to disk)
3615-
* by doing a linear search or using the max-heap depending on the number of
3616-
* transactions in ReorderBuffer. Refer to the comments atop this file for the
3617-
* algorithm details.
3522+
* Find the largest transaction (toplevel or subxact) to evict (spill to disk).
36183523
*/
36193524
static ReorderBufferTXN *
36203525
ReorderBufferLargestTXN(ReorderBuffer *rb)
36213526
{
3622-
ReorderBufferTXN *largest = NULL;
3623-
3624-
if (!ReorderBufferMaxHeapIsReady(rb))
3625-
{
3626-
/*
3627-
* If the number of transactions are small, we scan all transactions
3628-
* being decoded to get the largest transaction. This saves the cost
3629-
* of building a max-heap with a small number of transactions.
3630-
*/
3631-
if (hash_get_num_entries(rb->by_txn) < MAX_HEAP_TXN_COUNT_THRESHOLD)
3632-
{
3633-
HASH_SEQ_STATUS hash_seq;
3634-
ReorderBufferTXNByIdEnt *ent;
3635-
3636-
hash_seq_init(&hash_seq, rb->by_txn);
3637-
while ((ent = hash_seq_search(&hash_seq)) != NULL)
3638-
{
3639-
ReorderBufferTXN *txn = ent->txn;
3640-
3641-
/* if the current transaction is larger, remember it */
3642-
if ((!largest) || (txn->size > largest->size))
3643-
largest = txn;
3644-
}
3645-
3646-
Assert(largest);
3647-
}
3648-
else
3649-
{
3650-
/*
3651-
* There are a large number of transactions in ReorderBuffer. We
3652-
* build the max-heap for efficiently selecting the largest
3653-
* transactions.
3654-
*/
3655-
ReorderBufferBuildMaxHeap(rb);
3656-
3657-
/*
3658-
* The max-heap is ready now. We remain the max-heap at least
3659-
* until we free up enough transactions to bring the total memory
3660-
* usage below the limit. The largest transaction is selected
3661-
* below.
3662-
*/
3663-
Assert(ReorderBufferMaxHeapIsReady(rb));
3664-
}
3665-
}
3527+
ReorderBufferTXN *largest;
36663528

36673529
/* Get the largest transaction from the max-heap */
3668-
if (ReorderBufferMaxHeapIsReady(rb))
3669-
largest = (ReorderBufferTXN *)
3670-
DatumGetPointer(binaryheap_first(rb->txn_heap));
3530+
largest = pairingheap_container(ReorderBufferTXN, txn_node,
3531+
pairingheap_first(rb->txn_heap));
36713532

36723533
Assert(largest);
36733534
Assert(largest->size > 0);
@@ -3812,12 +3673,6 @@ ReorderBufferCheckMemoryLimit(ReorderBuffer *rb)
38123673
/* We must be under the memory limit now. */
38133674
Assert(rb->size < logical_decoding_work_mem * 1024L);
38143675

3815-
/*
3816-
* After evicting some transactions, the number of transactions might get
3817-
* lower than the threshold for the max-heap.
3818-
*/
3819-
ReorderBufferMaybeResetMaxHeap(rb);
3820-
38213676
}
38223677

38233678
/*

src/include/replication/reorderbuffer.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
#define REORDERBUFFER_H
1111

1212
#include "access/htup_details.h"
13-
#include "lib/binaryheap.h"
1413
#include "lib/ilist.h"
14+
#include "lib/pairingheap.h"
1515
#include "storage/sinval.h"
1616
#include "utils/hsearch.h"
1717
#include "utils/relcache.h"
@@ -402,6 +402,11 @@ typedef struct ReorderBufferTXN
402402
*/
403403
dlist_node catchange_node;
404404

405+
/*
406+
* A node in txn_heap
407+
*/
408+
pairingheap_node txn_node;
409+
405410
/*
406411
* Size of this transaction (changes currently in memory, in bytes).
407412
*/
@@ -633,7 +638,7 @@ struct ReorderBuffer
633638
Size size;
634639

635640
/* Max-heap for sizes of all top-level and sub transactions */
636-
binaryheap *txn_heap;
641+
pairingheap *txn_heap;
637642

638643
/*
639644
* Statistics about transactions spilled to disk.

0 commit comments

Comments
 (0)