Skip to content

Commit b840508

Browse files
Add functions to binaryheap for efficient key removal and update.
Previously, binaryheap didn't support updating a key and removing a node in an efficient way. For example, in order to remove a node from the binaryheap, the caller had to pass the node's position within the array that the binaryheap internally has. Removing a node from the binaryheap is done in O(log n) but searching for the key's position is done in O(n). This commit adds a hash table to binaryheap in order to track the position of each nodes in the binaryheap. That way, by using newly added functions such as binaryheap_update_up() etc., both updating a key and removing a node can be done in O(1) on an average and O(log n) in worst case. This is known as the indexed binary heap. The caller can specify to use the indexed binaryheap by passing indexed = true. The current code does not use the new indexing logic, but it will be used by an upcoming patch. Reviewed-by: Vignesh C, Peter Smith, Hayato Kuroda, Ajin Cherian, Tomas Vondra, Shubham Khanna Discussion: https://postgr.es/m/CAD21AoDffo37RC-eUuyHJKVEr017V2YYDLyn1xF_00ofptWbkg%40mail.gmail.com
1 parent bcb14f4 commit b840508

File tree

10 files changed

+232
-14
lines changed

10 files changed

+232
-14
lines changed

src/backend/executor/nodeGatherMerge.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ gather_merge_setup(GatherMergeState *gm_state)
422422
/* Allocate the resources for the merge */
423423
gm_state->gm_heap = binaryheap_allocate(nreaders + 1,
424424
heap_compare_slots,
425+
false,
425426
gm_state);
426427
}
427428

src/backend/executor/nodeMergeAppend.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags)
125125
mergestate->ms_nplans = nplans;
126126

127127
mergestate->ms_slots = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
128-
mergestate->ms_heap = binaryheap_allocate(nplans, heap_compare_slots,
128+
mergestate->ms_heap = binaryheap_allocate(nplans, heap_compare_slots, false,
129129
mergestate);
130130

131131
/*

src/backend/postmaster/pgarch.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ PgArchiverMain(char *startup_data, size_t startup_data_len)
254254

255255
/* Initialize our max-heap for prioritizing files to archive. */
256256
arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
257-
ready_file_comparator, NULL);
257+
ready_file_comparator, false,
258+
NULL);
258259

259260
/* Load the archive_library. */
260261
LoadArchiveLibrary();

src/backend/replication/logical/reorderbuffer.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,7 @@ ReorderBufferIterTXNInit(ReorderBuffer *rb, ReorderBufferTXN *txn,
12941294
/* allocate heap */
12951295
state->heap = binaryheap_allocate(state->nr_txns,
12961296
ReorderBufferIterCompare,
1297+
false,
12971298
state);
12981299

12991300
/* Now that the state fields are initialized, it is safe to return it. */

src/backend/storage/buffer/bufmgr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,6 +3014,7 @@ BufferSync(int flags)
30143014
*/
30153015
ts_heap = binaryheap_allocate(num_spaces,
30163016
ts_ckpt_progress_comparator,
3017+
false,
30173018
NULL);
30183019

30193020
for (i = 0; i < num_spaces; i++)

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4200,6 +4200,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
42004200
/* Set up ready_heap with enough room for all known TocEntrys */
42014201
ready_heap = binaryheap_allocate(AH->tocCount,
42024202
TocEntrySizeCompareBinaryheap,
4203+
false,
42034204
NULL);
42044205

42054206
/*

src/bin/pg_dump/pg_dump_sort.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ TopoSort(DumpableObject **objs,
405405
return true;
406406

407407
/* Create workspace for the above-described heap */
408-
pendingHeap = binaryheap_allocate(numObjs, int_cmp, NULL);
408+
pendingHeap = binaryheap_allocate(numObjs, int_cmp, false, NULL);
409409

410410
/*
411411
* Scan the constraints, and for each item in the input, generate a count

src/common/binaryheap.c

Lines changed: 188 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,30 @@
2222
#ifdef FRONTEND
2323
#include "common/logging.h"
2424
#endif
25+
#include "common/hashfn.h"
2526
#include "lib/binaryheap.h"
2627

28+
/*
29+
* Define parameters for hash table code generation. The interface is *also*
30+
* declared in binaryheap.h (to generate the types, which are externally
31+
* visible).
32+
*/
33+
#define SH_PREFIX bh_nodeidx
34+
#define SH_ELEMENT_TYPE bh_nodeidx_entry
35+
#define SH_KEY_TYPE bh_node_type
36+
#define SH_KEY key
37+
#define SH_HASH_KEY(tb, key) \
38+
hash_bytes((const unsigned char *) &key, sizeof(bh_node_type))
39+
#define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(bh_node_type)) == 0)
40+
#define SH_SCOPE extern
41+
#ifdef FRONTEND
42+
#define SH_RAW_ALLOCATOR pg_malloc0
43+
#endif
44+
#define SH_STORE_HASH
45+
#define SH_GET_HASH(tb, a) a->hash
46+
#define SH_DEFINE
47+
#include "lib/simplehash.h"
48+
2749
static void sift_down(binaryheap *heap, int node_off);
2850
static void sift_up(binaryheap *heap, int node_off);
2951

@@ -34,9 +56,14 @@ static void sift_up(binaryheap *heap, int node_off);
3456
* of nodes, and with the heap property defined by the given comparator
3557
* function, which will be invoked with the additional argument specified by
3658
* 'arg'.
59+
*
60+
* If 'indexed' is true, we create a hash table to track each node's
61+
* index in the heap, enabling to perform some operations such as
62+
* binaryheap_remove_node_ptr() etc.
3763
*/
3864
binaryheap *
39-
binaryheap_allocate(int num_nodes, binaryheap_comparator compare, void *arg)
65+
binaryheap_allocate(int num_nodes, binaryheap_comparator compare,
66+
bool indexed, void *arg)
4067
{
4168
binaryheap *heap;
4269

@@ -48,6 +75,17 @@ binaryheap_allocate(int num_nodes, binaryheap_comparator compare, void *arg)
4875
heap->bh_size = 0;
4976
heap->bh_has_heap_property = true;
5077
heap->bh_nodes = (bh_node_type *) palloc(sizeof(bh_node_type) * num_nodes);
78+
heap->bh_nodeidx = NULL;
79+
80+
if (indexed)
81+
{
82+
#ifdef FRONTEND
83+
heap->bh_nodeidx = bh_nodeidx_create(num_nodes, NULL);
84+
#else
85+
heap->bh_nodeidx = bh_nodeidx_create(CurrentMemoryContext, num_nodes,
86+
NULL);
87+
#endif
88+
}
5189

5290
return heap;
5391
}
@@ -63,6 +101,9 @@ binaryheap_reset(binaryheap *heap)
63101
{
64102
heap->bh_size = 0;
65103
heap->bh_has_heap_property = true;
104+
105+
if (binaryheap_indexed(heap))
106+
bh_nodeidx_reset(heap->bh_nodeidx);
66107
}
67108

68109
/*
@@ -73,6 +114,9 @@ binaryheap_reset(binaryheap *heap)
73114
void
74115
binaryheap_free(binaryheap *heap)
75116
{
117+
if (binaryheap_indexed(heap))
118+
bh_nodeidx_destroy(heap->bh_nodeidx);
119+
76120
pfree(heap->bh_nodes);
77121
pfree(heap);
78122
}
@@ -115,6 +159,67 @@ enlarge_node_array(binaryheap *heap)
115159
sizeof(bh_node_type) * heap->bh_space);
116160
}
117161

162+
/*
163+
* Set the given node at the 'index' and track it if required.
164+
*
165+
* Return true if the node's index is already tracked.
166+
*/
167+
static bool
168+
set_node(binaryheap *heap, bh_node_type node, int index)
169+
{
170+
bool found = false;
171+
172+
/* Set the node to the nodes array */
173+
heap->bh_nodes[index] = node;
174+
175+
if (binaryheap_indexed(heap))
176+
{
177+
bh_nodeidx_entry *ent;
178+
179+
/* Keep track of the node index */
180+
ent = bh_nodeidx_insert(heap->bh_nodeidx, node, &found);
181+
ent->index = index;
182+
}
183+
184+
return found;
185+
}
186+
187+
/*
188+
* Remove the node's index from the hash table if the heap is indexed.
189+
*/
190+
static inline void
191+
delete_nodeidx(binaryheap *heap, bh_node_type node)
192+
{
193+
if (binaryheap_indexed(heap))
194+
bh_nodeidx_delete(heap->bh_nodeidx, node);
195+
}
196+
197+
/*
198+
* Replace the existing node at 'idx' with the given 'new_node'. Also
199+
* update their positions accordingly. Note that we assume the new_node's
200+
* position is already tracked if enabled, i.e. the new_node is already
201+
* present in the heap.
202+
*/
203+
static void
204+
replace_node(binaryheap *heap, int index, bh_node_type new_node)
205+
{
206+
bool found PG_USED_FOR_ASSERTS_ONLY;
207+
208+
/* Quick return if not necessary to move */
209+
if (heap->bh_nodes[index] == new_node)
210+
return;
211+
212+
/* Remove the overwritten node's index */
213+
delete_nodeidx(heap, heap->bh_nodes[index]);
214+
215+
/*
216+
* Replace it with the given new node. This node's position must also be
217+
* tracked as we assume to replace the node with the existing node.
218+
*/
219+
found = set_node(heap, new_node, index);
220+
Assert(!binaryheap_indexed(heap) || found);
221+
}
222+
118223
/*
119224
* binaryheap_add_unordered
120225
*
@@ -131,7 +236,7 @@ binaryheap_add_unordered(binaryheap *heap, bh_node_type d)
131236
enlarge_node_array(heap);
132237

133238
heap->bh_has_heap_property = false;
134-
heap->bh_nodes[heap->bh_size] = d;
239+
set_node(heap, d, heap->bh_size);
135240
heap->bh_size++;
136241
}
137242

@@ -164,7 +269,7 @@ binaryheap_add(binaryheap *heap, bh_node_type d)
164269
if (heap->bh_size >= heap->bh_space)
165270
enlarge_node_array(heap);
166271

167-
heap->bh_nodes[heap->bh_size] = d;
272+
set_node(heap, d, heap->bh_size);
168273
heap->bh_size++;
169274
sift_up(heap, heap->bh_size - 1);
170275
}
@@ -205,14 +310,16 @@ binaryheap_remove_first(binaryheap *heap)
205310
if (heap->bh_size == 1)
206311
{
207312
heap->bh_size--;
313+
delete_nodeidx(heap, result);
314+
208315
return result;
209316
}
210317

211318
/*
212319
* Remove the last node, placing it in the vacated root entry, and sift
213320
* the new root node down to its correct position.
214321
*/
215-
heap->bh_nodes[0] = heap->bh_nodes[--heap->bh_size];
322+
replace_node(heap, 0, heap->bh_nodes[--heap->bh_size]);
216323
sift_down(heap, 0);
217324

218325
return result;
@@ -238,7 +345,7 @@ binaryheap_remove_node(binaryheap *heap, int n)
238345
heap->bh_arg);
239346

240347
/* remove the last node, placing it in the vacated entry */
241-
heap->bh_nodes[n] = heap->bh_nodes[heap->bh_size];
348+
replace_node(heap, n, heap->bh_nodes[heap->bh_size]);
242349

243350
/* sift as needed to preserve the heap property */
244351
if (cmp > 0)
@@ -247,6 +354,77 @@ binaryheap_remove_node(binaryheap *heap, int n)
247354
sift_down(heap, n);
248355
}
249356

357+
/*
358+
* binaryheap_remove_node_ptr
359+
*
360+
* Similar to binaryheap_remove_node() but removes the given node. The caller
361+
* must ensure that the given node is in the heap. O(log n) worst case.
362+
*
363+
* This function can be used only if the heap is indexed.
364+
*/
365+
void
366+
binaryheap_remove_node_ptr(binaryheap *heap, bh_node_type d)
367+
{
368+
bh_nodeidx_entry *ent;
369+
370+
Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property);
371+
Assert(binaryheap_indexed(heap));
372+
373+
ent = bh_nodeidx_lookup(heap->bh_nodeidx, d);
374+
Assert(ent);
375+
376+
binaryheap_remove_node(heap, ent->index);
377+
}
378+
379+
/*
380+
* Workhorse for binaryheap_update_up and binaryheap_update_down.
381+
*/
382+
static void
383+
resift_node(binaryheap *heap, bh_node_type node, bool sift_dir_up)
384+
{
385+
bh_nodeidx_entry *ent;
386+
387+
Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property);
388+
Assert(binaryheap_indexed(heap));
389+
390+
ent = bh_nodeidx_lookup(heap->bh_nodeidx, node);
391+
Assert(ent);
392+
Assert(ent->index >= 0 && ent->index < heap->bh_size);
393+
394+
if (sift_dir_up)
395+
sift_up(heap, ent->index);
396+
else
397+
sift_down(heap, ent->index);
398+
}
399+
400+
/*
401+
* binaryheap_update_up
402+
*
403+
* Sift the given node up after the node's key is updated. The caller must
404+
* ensure that the given node is in the heap. O(log n) worst case.
405+
*
406+
* This function can be used only if the heap is indexed.
407+
*/
408+
void
409+
binaryheap_update_up(binaryheap *heap, bh_node_type d)
410+
{
411+
resift_node(heap, d, true);
412+
}
413+
414+
/*
415+
* binaryheap_update_down
416+
*
417+
* Sift the given node down after the node's key is updated. The caller must
418+
* ensure that the given node is in the heap. O(log n) worst case.
419+
*
420+
* This function can be used only if the heap is indexed.
421+
*/
422+
void
423+
binaryheap_update_down(binaryheap *heap, bh_node_type d)
424+
{
425+
resift_node(heap, d, false);
426+
}
427+
250428
/*
251429
* binaryheap_replace_first
252430
*
@@ -259,7 +437,7 @@ binaryheap_replace_first(binaryheap *heap, bh_node_type d)
259437
{
260438
Assert(!binaryheap_empty(heap) && heap->bh_has_heap_property);
261439

262-
heap->bh_nodes[0] = d;
440+
replace_node(heap, 0, d);
263441

264442
if (heap->bh_size > 1)
265443
sift_down(heap, 0);
@@ -301,11 +479,11 @@ sift_up(binaryheap *heap, int node_off)
301479
* Otherwise, swap the parent value with the hole, and go on to check
302480
* the node's new parent.
303481
*/
304-
heap->bh_nodes[node_off] = parent_val;
482+
set_node(heap, parent_val, node_off);
305483
node_off = parent_off;
306484
}
307485
/* Re-fill the hole */
308-
heap->bh_nodes[node_off] = node_val;
486+
set_node(heap, node_val, node_off);
309487
}
310488

311489
/*
@@ -360,9 +538,9 @@ sift_down(binaryheap *heap, int node_off)
360538
* Otherwise, swap the hole with the child that violates the heap
361539
* property; then go on to check its children.
362540
*/
363-
heap->bh_nodes[node_off] = heap->bh_nodes[swap_off];
541+
set_node(heap, heap->bh_nodes[swap_off], node_off);
364542
node_off = swap_off;
365543
}
366544
/* Re-fill the hole */
367-
heap->bh_nodes[node_off] = node_val;
545+
set_node(heap, node_val, node_off);
368546
}

0 commit comments

Comments
 (0)