2017-02-02 20:12:58 +01:00
CREATE TABLE test_hash (a int, b text);
INSERT INTO test_hash VALUES (1, 'one');
CREATE INDEX test_hash_a_idx ON test_hash USING hash (a);
\x
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0));
-[ RECORD 1 ]--+---------
hash_page_type | metapage
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1));
-[ RECORD 1 ]--+-------
hash_page_type | bucket
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2));
-[ RECORD 1 ]--+-------
hash_page_type | bucket
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3));
-[ RECORD 1 ]--+-------
hash_page_type | bucket
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4));
-[ RECORD 1 ]--+-------
hash_page_type | bucket
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5));
-[ RECORD 1 ]--+-------
hash_page_type | bitmap
SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6));
ERROR: block number 6 is out of range for relation "test_hash_a_idx"
SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0);
2017-02-09 20:02:58 +01:00
ERROR: invalid overflow block number 0
2017-02-02 20:12:58 +01:00
SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1);
2017-02-09 20:02:58 +01:00
ERROR: invalid overflow block number 1
2017-02-02 20:12:58 +01:00
SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2);
2017-02-09 20:02:58 +01:00
ERROR: invalid overflow block number 2
2017-02-02 20:12:58 +01:00
SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3);
2017-02-09 20:02:58 +01:00
ERROR: invalid overflow block number 3
2017-02-02 20:12:58 +01:00
SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4);
2017-02-09 20:02:58 +01:00
ERROR: invalid overflow block number 4
2017-02-02 20:12:58 +01:00
SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5);
2017-02-09 20:02:58 +01:00
ERROR: invalid overflow block number 5
2017-02-03 17:06:41 +01:00
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
hash_metapage_info(get_raw_page('test_hash_a_idx', 0));
2017-02-02 20:12:58 +01:00
-[ RECORD 1 ]----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
magic | 105121344
Expand hash indexes more gradually.
Since hash indexes typically have very few overflow pages, adding a
new splitpoint essentially doubles the on-disk size of the index,
which can lead to large and abrupt increases in disk usage (and
perhaps long delays on occasion). To mitigate this problem to some
degree, divide larger splitpoints into four equal phases. This means
that, for example, instead of growing from 4GB to 8GB all at once, a
hash index will now grow from 4GB to 5GB to 6GB to 7GB to 8GB, which
is perhaps still not as smooth as we'd like but certainly an
improvement.
This changes the on-disk format of the metapage, so bump HASH_VERSION
from 2 to 3. This will force a REINDEX of all existing hash indexes,
but that's probably a good idea anyway. First, hash indexes from
pre-10 versions of PostgreSQL could easily be corrupted, and we don't
want to confuse corruption carried over from an older release with any
corruption caused despite the new write-ahead logging in v10. Second,
it will let us remove some backward-compatibility code added by commit
293e24e507838733aba4748b514536af2d39d7f2.
Mithun Cy, reviewed by Amit Kapila, Jesper Pedersen and me. Regression
test outputs updated by me.
Discussion: http://postgr.es/m/CAD__OuhG6F1gQLCgMQNnMNgoCvOLQZz9zKYJQNYvYmmJoM42gA@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoYty0jCf-pa+m+vYUJ716+AxM7nv_syvyanyf5O-L_i2A@mail.gmail.com
2017-04-04 05:46:33 +02:00
version | 3
2017-02-02 20:12:58 +01:00
ntuples | 1
bsize | 8152
bmsize | 4096
bmshift | 15
maxbucket | 3
highmask | 7
lowmask | 3
ovflpoint | 2
firstfree | 0
nmaps | 1
procid | 450
Expand hash indexes more gradually.
Since hash indexes typically have very few overflow pages, adding a
new splitpoint essentially doubles the on-disk size of the index,
which can lead to large and abrupt increases in disk usage (and
perhaps long delays on occasion). To mitigate this problem to some
degree, divide larger splitpoints into four equal phases. This means
that, for example, instead of growing from 4GB to 8GB all at once, a
hash index will now grow from 4GB to 5GB to 6GB to 7GB to 8GB, which
is perhaps still not as smooth as we'd like but certainly an
improvement.
This changes the on-disk format of the metapage, so bump HASH_VERSION
from 2 to 3. This will force a REINDEX of all existing hash indexes,
but that's probably a good idea anyway. First, hash indexes from
pre-10 versions of PostgreSQL could easily be corrupted, and we don't
want to confuse corruption carried over from an older release with any
corruption caused despite the new write-ahead logging in v10. Second,
it will let us remove some backward-compatibility code added by commit
293e24e507838733aba4748b514536af2d39d7f2.
Mithun Cy, reviewed by Amit Kapila, Jesper Pedersen and me. Regression
test outputs updated by me.
Discussion: http://postgr.es/m/CAD__OuhG6F1gQLCgMQNnMNgoCvOLQZz9zKYJQNYvYmmJoM42gA@mail.gmail.com
Discussion: http://postgr.es/m/CA+TgmoYty0jCf-pa+m+vYUJ716+AxM7nv_syvyanyf5O-L_i2A@mail.gmail.com
2017-04-04 05:46:33 +02:00
spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
2017-02-02 20:12:58 +01:00
mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
2017-02-03 17:06:41 +01:00
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
hash_metapage_info(get_raw_page('test_hash_a_idx', 1));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash meta page
2017-02-03 17:06:41 +01:00
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
hash_metapage_info(get_raw_page('test_hash_a_idx', 2));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash meta page
2017-02-03 17:06:41 +01:00
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
hash_metapage_info(get_raw_page('test_hash_a_idx', 3));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash meta page
2017-02-03 17:06:41 +01:00
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
hash_metapage_info(get_raw_page('test_hash_a_idx', 4));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash meta page
2017-02-03 17:06:41 +01:00
SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask,
lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM
hash_metapage_info(get_raw_page('test_hash_a_idx', 5));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash meta page
2017-02-03 17:06:41 +01:00
SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
hasho_bucket, hasho_flag, hasho_page_id FROM
hash_page_stats(get_raw_page('test_hash_a_idx', 0));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash bucket or overflow page
2017-02-03 17:06:41 +01:00
SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
hasho_bucket, hasho_flag, hasho_page_id FROM
hash_page_stats(get_raw_page('test_hash_a_idx', 1));
2017-02-02 20:12:58 +01:00
-[ RECORD 1 ]---+-----------
live_items | 0
dead_items | 0
page_size | 8192
Cache hash index's metapage in rel->rd_amcache.
This avoids a very significant amount of buffer manager traffic and
contention when scanning hash indexes, because it's no longer
necessary to lock and pin the metapage for every scan. We do need
some way of figuring out when the cache is too stale to use any more,
so that when we lock the primary bucket page to which the cached
metapage points us, we can tell whether a split has occurred since we
cached the metapage data. To do that, we use the hash_prevblkno field
in the primary bucket page, which would otherwise always be set to
InvalidBuffer.
This patch contains code so that it will continue working (although
less efficiently) with hash indexes built before this change, but
perhaps we should consider bumping the hash version and ripping out
the compatibility code. That decision can be made later, though.
Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me.
Before committing, I made a number of cosmetic changes to the last
posted version of the patch, adjusted _hash_getcachedmetap to be more
careful about order of operation, and made some necessary updates to
the pageinspect documentation and regression tests.
2017-02-07 18:24:25 +01:00
hasho_prevblkno | 3
2017-02-02 20:12:58 +01:00
hasho_nextblkno | 4294967295
hasho_bucket | 0
hasho_flag | 2
hasho_page_id | 65408
2017-02-03 17:06:41 +01:00
SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
hasho_bucket, hasho_flag, hasho_page_id FROM
hash_page_stats(get_raw_page('test_hash_a_idx', 2));
2017-02-02 20:12:58 +01:00
-[ RECORD 1 ]---+-----------
live_items | 0
dead_items | 0
page_size | 8192
Cache hash index's metapage in rel->rd_amcache.
This avoids a very significant amount of buffer manager traffic and
contention when scanning hash indexes, because it's no longer
necessary to lock and pin the metapage for every scan. We do need
some way of figuring out when the cache is too stale to use any more,
so that when we lock the primary bucket page to which the cached
metapage points us, we can tell whether a split has occurred since we
cached the metapage data. To do that, we use the hash_prevblkno field
in the primary bucket page, which would otherwise always be set to
InvalidBuffer.
This patch contains code so that it will continue working (although
less efficiently) with hash indexes built before this change, but
perhaps we should consider bumping the hash version and ripping out
the compatibility code. That decision can be made later, though.
Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me.
Before committing, I made a number of cosmetic changes to the last
posted version of the patch, adjusted _hash_getcachedmetap to be more
careful about order of operation, and made some necessary updates to
the pageinspect documentation and regression tests.
2017-02-07 18:24:25 +01:00
hasho_prevblkno | 3
2017-02-02 20:12:58 +01:00
hasho_nextblkno | 4294967295
hasho_bucket | 1
hasho_flag | 2
hasho_page_id | 65408
2017-02-03 17:06:41 +01:00
SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
hasho_bucket, hasho_flag, hasho_page_id FROM
hash_page_stats(get_raw_page('test_hash_a_idx', 3));
2017-02-02 20:12:58 +01:00
-[ RECORD 1 ]---+-----------
live_items | 1
dead_items | 0
page_size | 8192
Cache hash index's metapage in rel->rd_amcache.
This avoids a very significant amount of buffer manager traffic and
contention when scanning hash indexes, because it's no longer
necessary to lock and pin the metapage for every scan. We do need
some way of figuring out when the cache is too stale to use any more,
so that when we lock the primary bucket page to which the cached
metapage points us, we can tell whether a split has occurred since we
cached the metapage data. To do that, we use the hash_prevblkno field
in the primary bucket page, which would otherwise always be set to
InvalidBuffer.
This patch contains code so that it will continue working (although
less efficiently) with hash indexes built before this change, but
perhaps we should consider bumping the hash version and ripping out
the compatibility code. That decision can be made later, though.
Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me.
Before committing, I made a number of cosmetic changes to the last
posted version of the patch, adjusted _hash_getcachedmetap to be more
careful about order of operation, and made some necessary updates to
the pageinspect documentation and regression tests.
2017-02-07 18:24:25 +01:00
hasho_prevblkno | 3
2017-02-02 20:12:58 +01:00
hasho_nextblkno | 4294967295
hasho_bucket | 2
hasho_flag | 2
hasho_page_id | 65408
2017-02-03 17:06:41 +01:00
SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
hasho_bucket, hasho_flag, hasho_page_id FROM
hash_page_stats(get_raw_page('test_hash_a_idx', 4));
2017-02-02 20:12:58 +01:00
-[ RECORD 1 ]---+-----------
live_items | 0
dead_items | 0
page_size | 8192
Cache hash index's metapage in rel->rd_amcache.
This avoids a very significant amount of buffer manager traffic and
contention when scanning hash indexes, because it's no longer
necessary to lock and pin the metapage for every scan. We do need
some way of figuring out when the cache is too stale to use any more,
so that when we lock the primary bucket page to which the cached
metapage points us, we can tell whether a split has occurred since we
cached the metapage data. To do that, we use the hash_prevblkno field
in the primary bucket page, which would otherwise always be set to
InvalidBuffer.
This patch contains code so that it will continue working (although
less efficiently) with hash indexes built before this change, but
perhaps we should consider bumping the hash version and ripping out
the compatibility code. That decision can be made later, though.
Mithun Cy, reviewed by Jesper Pedersen, Amit Kapila, and by me.
Before committing, I made a number of cosmetic changes to the last
posted version of the patch, adjusted _hash_getcachedmetap to be more
careful about order of operation, and made some necessary updates to
the pageinspect documentation and regression tests.
2017-02-07 18:24:25 +01:00
hasho_prevblkno | 3
2017-02-02 20:12:58 +01:00
hasho_nextblkno | 4294967295
hasho_bucket | 3
hasho_flag | 2
hasho_page_id | 65408
2017-02-03 17:06:41 +01:00
SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno,
hasho_bucket, hasho_flag, hasho_page_id FROM
hash_page_stats(get_raw_page('test_hash_a_idx', 5));
2017-02-02 20:12:58 +01:00
ERROR: page is not a hash bucket or overflow page
SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0));
ERROR: page is not a hash bucket or overflow page
SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1));
(0 rows)
SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2));
(0 rows)
SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3));
-[ RECORD 1 ]----------
itemoffset | 1
ctid | (0,1)
data | 2389907270
SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4));
(0 rows)
SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5));
ERROR: page is not a hash bucket or overflow page
DROP TABLE test_hash;