Skip to content

Commit 909eebf

Browse files
committed
dshash: revise sequential scan support.
The previous coding of dshash_seq_next(), on the first call, accessed status->hash_table->size_log2 without holding a partition lock and without guaranteeing that ensure_valid_bucket_pointers() had ever been called. That oversight turns out to not have immediately visible effects, because bucket 0 is always in partition 0, and ensure_valid_bucket_pointers() was called after acquiring the partition lock. However, PARTITION_FOR_BUCKET_INDEX() with a size_log2 of 0 ends up triggering formally undefined behaviour. Simplify by accessing partition 0, without using PARTITION_FOR_BUCKET_INDEX(). While at it, remove dshash_get_current(), there is no convincing use case. Also polish a few comments. Author: Andres Freund <andres@anarazel.de> Reviewed-By: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/CA+hUKGL9hY_VY=+oUK+Gc1iSRx-Ls5qeYJ6q=dQVZnT3R63Taw@mail.gmail.com
1 parent 55e566f commit 909eebf

File tree

2 files changed

+27
-30
lines changed

2 files changed

+27
-30
lines changed

src/backend/lib/dshash.c

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -601,13 +601,12 @@ dshash_memhash(const void *v, size_t size, void *arg)
601601
}
602602

603603
/*
604-
* dshash_seq_init/_next/_term
605-
* Sequentially scan through dshash table and return all the
606-
* elements one by one, return NULL when no more.
604+
* Sequentially scan through dshash table and return all the elements one by
605+
* one, return NULL when all elements have been returned.
607606
*
608-
* dshash_seq_term should always be called when a scan finished.
609-
* The caller may delete returned elements midst of a scan by using
610-
* dshash_delete_current(). exclusive must be true to delete elements.
607+
* dshash_seq_term needs to be called when a scan finished. The caller may
608+
* delete returned elements midst of a scan by using dshash_delete_current()
609+
* if exclusive = true.
611610
*/
612611
void
613612
dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
@@ -625,34 +624,38 @@ dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
625624
/*
626625
* Returns the next element.
627626
*
628-
* Returned elements are locked and the caller must not explicitly release
629-
* it. It is released at the next call to dshash_next().
627+
* Returned elements are locked and the caller may not release the lock. It is
628+
* released by future calls to dshash_seq_next() or dshash_seq_term().
630629
*/
631630
void *
632631
dshash_seq_next(dshash_seq_status *status)
633632
{
634633
dsa_pointer next_item_pointer;
635634

636-
if (status->curitem == NULL)
635+
/*
636+
* Not yet holding any partition locks. Need to determine the size of the
637+
* hash table, it could have been resized since we were looking
638+
* last. Since we iterate in partition order, we can start by
639+
* unconditionally lock partition 0.
640+
*
641+
* Once we hold the lock, no resizing can happen until the scan ends. So
642+
* we don't need to repeatedly call ensure_valid_bucket_pointers().
643+
*/
644+
if (status->curpartition == -1)
637645
{
638-
int partition;
639-
640646
Assert(status->curbucket == 0);
641647
Assert(!status->hash_table->find_locked);
642648

643-
/* first shot. grab the first item. */
644-
partition =
645-
PARTITION_FOR_BUCKET_INDEX(status->curbucket,
646-
status->hash_table->size_log2);
647-
LWLockAcquire(PARTITION_LOCK(status->hash_table, partition),
649+
status->curpartition = 0;
650+
651+
LWLockAcquire(PARTITION_LOCK(status->hash_table,
652+
status->curpartition),
648653
status->exclusive ? LW_EXCLUSIVE : LW_SHARED);
649-
status->curpartition = partition;
650654

651-
/* resize doesn't happen from now until seq scan ends */
652-
status->nbuckets =
653-
NUM_BUCKETS(status->hash_table->control->size_log2);
654655
ensure_valid_bucket_pointers(status->hash_table);
655656

657+
status->nbuckets =
658+
NUM_BUCKETS(status->hash_table->control->size_log2);
656659
next_item_pointer = status->hash_table->buckets[status->curbucket];
657660
}
658661
else
@@ -714,7 +717,7 @@ dshash_seq_next(dshash_seq_status *status)
714717
/*
715718
* Terminates the seqscan and release all locks.
716719
*
717-
* Should be always called when finishing or exiting a seqscan.
720+
* Needs to be called after finishing or when exiting a seqscan.
718721
*/
719722
void
720723
dshash_seq_term(dshash_seq_status *status)
@@ -726,7 +729,9 @@ dshash_seq_term(dshash_seq_status *status)
726729
LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition));
727730
}
728731

729-
/* Remove the current entry while a seq scan. */
732+
/*
733+
* Remove the current entry of the seq scan.
734+
*/
730735
void
731736
dshash_delete_current(dshash_seq_status *status)
732737
{
@@ -746,13 +751,6 @@ dshash_delete_current(dshash_seq_status *status)
746751
delete_item(hash_table, item);
747752
}
748753

749-
/* Get the current entry while a seq scan. */
750-
void *
751-
dshash_get_current(dshash_seq_status *status)
752-
{
753-
return ENTRY_FROM_ITEM(status->curitem);
754-
}
755-
756754
/*
757755
* Print debugging information about the internal state of the hash table to
758756
* stderr. The caller must hold no partition locks.

src/include/lib/dshash.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ extern void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table,
101101
extern void *dshash_seq_next(dshash_seq_status *status);
102102
extern void dshash_seq_term(dshash_seq_status *status);
103103
extern void dshash_delete_current(dshash_seq_status *status);
104-
extern void *dshash_get_current(dshash_seq_status *status);
105104

106105
/* Convenience hash and compare functions wrapping memcmp and tag_hash. */
107106
extern int dshash_memcmp(const void *a, const void *b, size_t size, void *arg);

0 commit comments

Comments
 (0)