From 909eebf27b9e6aaa78fb3338f7d8fbc7fa174247 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 4 Apr 2022 14:32:52 -0700 Subject: [PATCH] dshash: revise sequential scan support. The previous coding of dshash_seq_next(), on the first call, accessed status->hash_table->size_log2 without holding a partition lock and without guaranteeing that ensure_valid_bucket_pointers() had ever been called. That oversight turns out to not have immediately visible effects, because bucket 0 is always in partition 0, and ensure_valid_bucket_pointers() was called after acquiring the partition lock. However, PARTITION_FOR_BUCKET_INDEX() with a size_log2 of 0 ends up triggering formally undefined behaviour. Simplify by accessing partition 0, without using PARTITION_FOR_BUCKET_INDEX(). While at it, remove dshash_get_current(), there is no convincing use case. Also polish a few comments. Author: Andres Freund Reviewed-By: Thomas Munro Discussion: https://postgr.es/m/CA+hUKGL9hY_VY=+oUK+Gc1iSRx-Ls5qeYJ6q=dQVZnT3R63Taw@mail.gmail.com --- src/backend/lib/dshash.c | 58 +++++++++++++++++++--------------------- src/include/lib/dshash.h | 1 - 2 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/backend/lib/dshash.c b/src/backend/lib/dshash.c index 84a9db47c7..1b94a76e43 100644 --- a/src/backend/lib/dshash.c +++ b/src/backend/lib/dshash.c @@ -601,13 +601,12 @@ dshash_memhash(const void *v, size_t size, void *arg) } /* - * dshash_seq_init/_next/_term - * Sequentially scan through dshash table and return all the - * elements one by one, return NULL when no more. + * Sequentially scan through dshash table and return all the elements one by + * one, return NULL when all elements have been returned. * - * dshash_seq_term should always be called when a scan finished. - * The caller may delete returned elements midst of a scan by using - * dshash_delete_current(). exclusive must be true to delete elements. + * dshash_seq_term needs to be called when a scan finished. The caller may + * delete returned elements midst of a scan by using dshash_delete_current() + * if exclusive = true. */ void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, @@ -625,34 +624,38 @@ dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, /* * Returns the next element. * - * Returned elements are locked and the caller must not explicitly release - * it. It is released at the next call to dshash_next(). + * Returned elements are locked and the caller may not release the lock. It is + * released by future calls to dshash_seq_next() or dshash_seq_term(). */ void * dshash_seq_next(dshash_seq_status *status) { dsa_pointer next_item_pointer; - if (status->curitem == NULL) + /* + * Not yet holding any partition locks. Need to determine the size of the + * hash table, it could have been resized since we were looking + * last. Since we iterate in partition order, we can start by + * unconditionally lock partition 0. + * + * Once we hold the lock, no resizing can happen until the scan ends. So + * we don't need to repeatedly call ensure_valid_bucket_pointers(). + */ + if (status->curpartition == -1) { - int partition; - Assert(status->curbucket == 0); Assert(!status->hash_table->find_locked); - /* first shot. grab the first item. */ - partition = - PARTITION_FOR_BUCKET_INDEX(status->curbucket, - status->hash_table->size_log2); - LWLockAcquire(PARTITION_LOCK(status->hash_table, partition), - status->exclusive ? LW_EXCLUSIVE : LW_SHARED); - status->curpartition = partition; + status->curpartition = 0; + + LWLockAcquire(PARTITION_LOCK(status->hash_table, + status->curpartition), + status->exclusive ? LW_EXCLUSIVE : LW_SHARED); - /* resize doesn't happen from now until seq scan ends */ - status->nbuckets = - NUM_BUCKETS(status->hash_table->control->size_log2); ensure_valid_bucket_pointers(status->hash_table); + status->nbuckets = + NUM_BUCKETS(status->hash_table->control->size_log2); next_item_pointer = status->hash_table->buckets[status->curbucket]; } else @@ -714,7 +717,7 @@ dshash_seq_next(dshash_seq_status *status) /* * Terminates the seqscan and release all locks. * - * Should be always called when finishing or exiting a seqscan. + * Needs to be called after finishing or when exiting a seqscan. */ void dshash_seq_term(dshash_seq_status *status) @@ -726,7 +729,9 @@ dshash_seq_term(dshash_seq_status *status) LWLockRelease(PARTITION_LOCK(status->hash_table, status->curpartition)); } -/* Remove the current entry while a seq scan. */ +/* + * Remove the current entry of the seq scan. + */ void dshash_delete_current(dshash_seq_status *status) { @@ -746,13 +751,6 @@ dshash_delete_current(dshash_seq_status *status) delete_item(hash_table, item); } -/* Get the current entry while a seq scan. */ -void * -dshash_get_current(dshash_seq_status *status) -{ - return ENTRY_FROM_ITEM(status->curitem); -} - /* * Print debugging information about the internal state of the hash table to * stderr. The caller must hold no partition locks. diff --git a/src/include/lib/dshash.h b/src/include/lib/dshash.h index caeb60ad72..28f8db2eab 100644 --- a/src/include/lib/dshash.h +++ b/src/include/lib/dshash.h @@ -101,7 +101,6 @@ extern void dshash_seq_init(dshash_seq_status *status, dshash_table *hash_table, extern void *dshash_seq_next(dshash_seq_status *status); extern void dshash_seq_term(dshash_seq_status *status); extern void dshash_delete_current(dshash_seq_status *status); -extern void *dshash_get_current(dshash_seq_status *status); /* Convenience hash and compare functions wrapping memcmp and tag_hash. */ extern int dshash_memcmp(const void *a, const void *b, size_t size, void *arg);