diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c index 19695ee102..30c82e191c 100644 --- a/src/backend/access/hash/hash.c +++ b/src/backend/access/hash/hash.c @@ -22,6 +22,7 @@ #include "access/relscan.h" #include "catalog/index.h" #include "commands/vacuum.h" +#include "miscadmin.h" #include "optimizer/plancat.h" #include "utils/index_selfuncs.h" #include "utils/rel.h" @@ -97,6 +98,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) double reltuples; double allvisfrac; uint32 num_buckets; + long sort_threshold; HashBuildState buildstate; /* @@ -120,12 +122,24 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo) * then we'll thrash horribly. To prevent that scenario, we can sort the * tuples by (expected) bucket number. However, such a sort is useless * overhead when the index does fit in RAM. We choose to sort if the - * initial index size exceeds NBuffers. + * initial index size exceeds maintenance_work_mem, or the number of + * buffers usable for the index, whichever is less. (Limiting by the + * number of buffers should reduce thrashing between PG buffers and kernel + * buffers, which seems useful even if no physical I/O results. Limiting + * by maintenance_work_mem is useful to allow easy testing of the sort + * code path, and may be useful to DBAs as an additional control knob.) * * NOTE: this test will need adjustment if a bucket is ever different from - * one page. + * one page. Also, "initial index size" accounting does not include the + * metapage, nor the first bitmap page. */ - if (num_buckets >= (uint32) NBuffers) + sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ; + if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP) + sort_threshold = Min(sort_threshold, NBuffers); + else + sort_threshold = Min(sort_threshold, NLocBuffer); + + if (num_buckets >= (uint32) sort_threshold) buildstate.spool = _h_spoolinit(heap, index, num_buckets); else buildstate.spool = NULL; diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c index 51b9f3f792..8938ab5b24 100644 --- a/src/backend/access/hash/hashsort.c +++ b/src/backend/access/hash/hashsort.c @@ -37,6 +37,7 @@ struct HSpool { Tuplesortstate *sortstate; /* state data for tuplesort.c */ Relation index; + uint32 hash_mask; /* bitmask for hash codes */ }; @@ -47,7 +48,6 @@ HSpool * _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) { HSpool *hspool = (HSpool *) palloc0(sizeof(HSpool)); - uint32 hash_mask; hspool->index = index; @@ -60,7 +60,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) * we could just compute num_buckets - 1. We prefer not to assume that * here, though. */ - hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1; + hspool->hash_mask = (((uint32) 1) << _hash_log2(num_buckets)) - 1; /* * We size the sort area as maintenance_work_mem rather than work_mem to @@ -69,7 +69,7 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets) */ hspool->sortstate = tuplesort_begin_index_hash(heap, index, - hash_mask, + hspool->hash_mask, maintenance_work_mem, false); @@ -105,12 +105,29 @@ _h_indexbuild(HSpool *hspool) { IndexTuple itup; bool should_free; +#ifdef USE_ASSERT_CHECKING + uint32 hashkey = 0; +#endif tuplesort_performsort(hspool->sortstate); while ((itup = tuplesort_getindextuple(hspool->sortstate, true, &should_free)) != NULL) { + /* + * Technically, it isn't critical that hash keys be found in sorted + * order, since this sorting is only used to increase locality of + * access as a performance optimization. It still seems like a good + * idea to test tuplesort.c's handling of hash index tuple sorts + * through an assertion, though. + */ +#ifdef USE_ASSERT_CHECKING + uint32 lasthashkey = hashkey; + + hashkey = _hash_get_indextuple_hashkey(itup) & hspool->hash_mask; + Assert(hashkey >= lasthashkey); +#endif + _hash_doinsert(hspool->index, itup); if (should_free) pfree(itup); diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 97cc49e623..0be5cf2dbe 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -2346,6 +2346,13 @@ CREATE UNLOGGED TABLE unlogged_hash_table (id int4); CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops); DROP TABLE unlogged_hash_table; -- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops); +-- Test hash index build tuplesorting. Force hash tuplesort using low +-- maintenance_work_mem setting and fillfactor: +SET maintenance_work_mem = '1MB'; +CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10); +WARNING: hash indexes are not WAL-logged and their use is discouraged +DROP INDEX hash_tuplesort_idx; +RESET maintenance_work_mem; -- -- Test functional index -- diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 7c582ea810..7e0bd84ff7 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -690,6 +690,13 @@ DROP TABLE unlogged_hash_table; -- CREATE INDEX hash_ovfl_index ON hash_ovfl_heap USING hash (x int4_ops); +-- Test hash index build tuplesorting. Force hash tuplesort using low +-- maintenance_work_mem setting and fillfactor: +SET maintenance_work_mem = '1MB'; +CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10); +DROP INDEX hash_tuplesort_idx; +RESET maintenance_work_mem; + -- -- Test functional index