1997-04-27 06:03:50 +02:00
|
|
|
--
|
2000-01-06 07:41:55 +01:00
|
|
|
-- BTREE_INDEX
|
2022-02-08 21:30:38 +01:00
|
|
|
--
|
|
|
|
|
|
|
|
-- directory paths are passed to us in environment variables
|
|
|
|
\getenv abs_srcdir PG_ABS_SRCDIR
|
|
|
|
|
|
|
|
CREATE TABLE bt_i4_heap (
|
|
|
|
seqno int4,
|
|
|
|
random int4
|
|
|
|
);
|
|
|
|
|
|
|
|
CREATE TABLE bt_name_heap (
|
|
|
|
seqno name,
|
|
|
|
random int4
|
|
|
|
);
|
|
|
|
|
|
|
|
CREATE TABLE bt_txt_heap (
|
|
|
|
seqno text,
|
|
|
|
random int4
|
|
|
|
);
|
|
|
|
|
|
|
|
CREATE TABLE bt_f8_heap (
|
|
|
|
seqno float8,
|
|
|
|
random int4
|
|
|
|
);
|
|
|
|
|
|
|
|
\set filename :abs_srcdir '/data/desc.data'
|
|
|
|
COPY bt_i4_heap FROM :'filename';
|
|
|
|
|
|
|
|
\set filename :abs_srcdir '/data/hash.data'
|
|
|
|
COPY bt_name_heap FROM :'filename';
|
|
|
|
|
|
|
|
\set filename :abs_srcdir '/data/desc.data'
|
|
|
|
COPY bt_txt_heap FROM :'filename';
|
|
|
|
|
|
|
|
\set filename :abs_srcdir '/data/hash.data'
|
|
|
|
COPY bt_f8_heap FROM :'filename';
|
|
|
|
|
|
|
|
ANALYZE bt_i4_heap;
|
|
|
|
ANALYZE bt_name_heap;
|
|
|
|
ANALYZE bt_txt_heap;
|
|
|
|
ANALYZE bt_f8_heap;
|
|
|
|
|
|
|
|
--
|
|
|
|
-- BTREE ascending/descending cases
|
|
|
|
--
|
|
|
|
-- we load int4/text from pure descending data (each key is a new
|
|
|
|
-- low key) and name/f8 from pure ascending data (each key is a new
|
|
|
|
-- high key). we had a bug where new low keys would sometimes be
|
|
|
|
-- "lost".
|
|
|
|
--
|
|
|
|
CREATE INDEX bt_i4_index ON bt_i4_heap USING btree (seqno int4_ops);
|
|
|
|
|
|
|
|
CREATE INDEX bt_name_index ON bt_name_heap USING btree (seqno name_ops);
|
|
|
|
|
|
|
|
CREATE INDEX bt_txt_index ON bt_txt_heap USING btree (seqno text_ops);
|
|
|
|
|
|
|
|
CREATE INDEX bt_f8_index ON bt_f8_heap USING btree (seqno float8_ops);
|
|
|
|
|
|
|
|
--
|
2000-01-06 07:41:55 +01:00
|
|
|
-- test retrieval of min/max keys for each index
|
1997-04-27 06:03:50 +02:00
|
|
|
--
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_i4_heap b
|
|
|
|
WHERE b.seqno < 1;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_i4_heap b
|
|
|
|
WHERE b.seqno >= 9999;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_i4_heap b
|
|
|
|
WHERE b.seqno = 4500;
|
|
|
|
|
|
|
|
SELECT b.*
|
1998-04-26 06:12:15 +02:00
|
|
|
FROM bt_name_heap b
|
|
|
|
WHERE b.seqno < '1'::name;
|
1997-04-27 06:03:50 +02:00
|
|
|
|
|
|
|
SELECT b.*
|
1998-04-26 06:12:15 +02:00
|
|
|
FROM bt_name_heap b
|
|
|
|
WHERE b.seqno >= '9999'::name;
|
1997-04-27 06:03:50 +02:00
|
|
|
|
|
|
|
SELECT b.*
|
1998-04-26 06:12:15 +02:00
|
|
|
FROM bt_name_heap b
|
|
|
|
WHERE b.seqno = '4500'::name;
|
1997-04-27 06:03:50 +02:00
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_txt_heap b
|
|
|
|
WHERE b.seqno < '1'::text;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_txt_heap b
|
|
|
|
WHERE b.seqno >= '9999'::text;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_txt_heap b
|
|
|
|
WHERE b.seqno = '4500'::text;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_f8_heap b
|
|
|
|
WHERE b.seqno < '1'::float8;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_f8_heap b
|
|
|
|
WHERE b.seqno >= '9999'::float8;
|
|
|
|
|
|
|
|
SELECT b.*
|
|
|
|
FROM bt_f8_heap b
|
|
|
|
WHERE b.seqno = '4500'::float8;
|
|
|
|
|
2008-11-20 20:52:54 +01:00
|
|
|
--
|
|
|
|
-- Check correct optimization of LIKE (special index operator support)
|
|
|
|
-- for both indexscan and bitmapscan cases
|
|
|
|
--
|
|
|
|
|
|
|
|
set enable_seqscan to false;
|
|
|
|
set enable_indexscan to true;
|
|
|
|
set enable_bitmapscan to false;
|
2019-02-12 03:26:08 +01:00
|
|
|
explain (costs off)
|
2008-11-20 20:52:54 +01:00
|
|
|
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
|
2019-02-12 03:26:08 +01:00
|
|
|
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
|
|
|
|
explain (costs off)
|
|
|
|
select proname from pg_proc where proname ilike '00%foo' order by 1;
|
|
|
|
select proname from pg_proc where proname ilike '00%foo' order by 1;
|
|
|
|
explain (costs off)
|
|
|
|
select proname from pg_proc where proname ilike 'ri%foo' order by 1;
|
2008-11-20 20:52:54 +01:00
|
|
|
|
|
|
|
set enable_indexscan to false;
|
|
|
|
set enable_bitmapscan to true;
|
2019-02-12 03:26:08 +01:00
|
|
|
explain (costs off)
|
|
|
|
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
|
2008-11-20 20:52:54 +01:00
|
|
|
select proname from pg_proc where proname like E'RI\\_FKey%del' order by 1;
|
2019-02-12 03:26:08 +01:00
|
|
|
explain (costs off)
|
|
|
|
select proname from pg_proc where proname ilike '00%foo' order by 1;
|
|
|
|
select proname from pg_proc where proname ilike '00%foo' order by 1;
|
|
|
|
explain (costs off)
|
|
|
|
select proname from pg_proc where proname ilike 'ri%foo' order by 1;
|
|
|
|
|
|
|
|
reset enable_seqscan;
|
|
|
|
reset enable_indexscan;
|
|
|
|
reset enable_bitmapscan;
|
2014-11-19 18:24:58 +01:00
|
|
|
|
2019-11-19 23:03:26 +01:00
|
|
|
-- Also check LIKE optimization with binary-compatible cases
|
|
|
|
|
|
|
|
create temp table btree_bpchar (f1 text collate "C");
|
Add deduplication to nbtree.
Deduplication reduces the storage overhead of duplicates in indexes that
use the standard nbtree index access method. The deduplication process
is applied lazily, after the point where opportunistic deletion of
LP_DEAD-marked index tuples occurs. Deduplication is only applied at
the point where a leaf page split would otherwise be required. New
posting list tuples are formed by merging together existing duplicate
tuples. The physical representation of the items on an nbtree leaf page
is made more space efficient by deduplication, but the logical contents
of the page are not changed. Even unique indexes make use of
deduplication as a way of controlling bloat from duplicates whose TIDs
point to different versions of the same logical table row.
The lazy approach taken by nbtree has significant advantages over a GIN
style eager approach. Most individual inserts of index tuples have
exactly the same overhead as before. The extra overhead of
deduplication is amortized across insertions, just like the overhead of
page splits. The key space of indexes works in the same way as it has
since commit dd299df8 (the commit that made heap TID a tiebreaker
column).
Testing has shown that nbtree deduplication can generally make indexes
with about 10 or 15 tuples for each distinct key value about 2.5X - 4X
smaller, even with single column integer indexes (e.g., an index on a
referencing column that accompanies a foreign key). The final size of
single column nbtree indexes comes close to the final size of a similar
contrib/btree_gin index, at least in cases where GIN's posting list
compression isn't very effective. This can significantly improve
transaction throughput, and significantly reduce the cost of vacuuming
indexes.
A new index storage parameter (deduplicate_items) controls the use of
deduplication. The default setting is 'on', so all new B-Tree indexes
automatically use deduplication where possible. This decision will be
reviewed at the end of the Postgres 13 beta period.
There is a regression of approximately 2% of transaction throughput with
synthetic workloads that consist of append-only inserts into a table
with several non-unique indexes, where all indexes have few or no
repeated values. The underlying issue is that cycles are wasted on
unsuccessful attempts at deduplicating items in non-unique indexes.
There doesn't seem to be a way around it short of disabling
deduplication entirely. Note that deduplication of items in unique
indexes is fairly well targeted in general, which avoids the problem
there (we can use a special heuristic to trigger deduplication passes in
unique indexes, since we're specifically targeting "version bloat").
Bump XLOG_PAGE_MAGIC because xl_btree_vacuum changed.
No bump in BTREE_VERSION, since the representation of posting list
tuples works in a way that's backwards compatible with version 4 indexes
(i.e. indexes built on PostgreSQL 12). However, users must still
REINDEX a pg_upgrade'd index to use deduplication, regardless of the
Postgres version they've upgraded from. This is the only way to set the
new nbtree metapage flag indicating that deduplication is generally
safe.
Author: Anastasia Lubennikova, Peter Geoghegan
Reviewed-By: Peter Geoghegan, Heikki Linnakangas
Discussion:
https://postgr.es/m/55E4051B.7020209@postgrespro.ru
https://postgr.es/m/4ab6e2db-bcee-f4cf-0916-3a06e6ccbb55@postgrespro.ru
2020-02-26 22:05:30 +01:00
|
|
|
create index on btree_bpchar(f1 bpchar_ops) WITH (deduplicate_items=on);
|
2019-11-19 23:03:26 +01:00
|
|
|
insert into btree_bpchar values ('foo'), ('fool'), ('bar'), ('quux');
|
|
|
|
-- doesn't match index:
|
|
|
|
explain (costs off)
|
|
|
|
select * from btree_bpchar where f1 like 'foo';
|
|
|
|
select * from btree_bpchar where f1 like 'foo';
|
|
|
|
explain (costs off)
|
|
|
|
select * from btree_bpchar where f1 like 'foo%';
|
|
|
|
select * from btree_bpchar where f1 like 'foo%';
|
|
|
|
-- these do match the index:
|
|
|
|
explain (costs off)
|
|
|
|
select * from btree_bpchar where f1::bpchar like 'foo';
|
|
|
|
select * from btree_bpchar where f1::bpchar like 'foo';
|
|
|
|
explain (costs off)
|
|
|
|
select * from btree_bpchar where f1::bpchar like 'foo%';
|
|
|
|
select * from btree_bpchar where f1::bpchar like 'foo%';
|
|
|
|
|
Add deduplication to nbtree.
Deduplication reduces the storage overhead of duplicates in indexes that
use the standard nbtree index access method. The deduplication process
is applied lazily, after the point where opportunistic deletion of
LP_DEAD-marked index tuples occurs. Deduplication is only applied at
the point where a leaf page split would otherwise be required. New
posting list tuples are formed by merging together existing duplicate
tuples. The physical representation of the items on an nbtree leaf page
is made more space efficient by deduplication, but the logical contents
of the page are not changed. Even unique indexes make use of
deduplication as a way of controlling bloat from duplicates whose TIDs
point to different versions of the same logical table row.
The lazy approach taken by nbtree has significant advantages over a GIN
style eager approach. Most individual inserts of index tuples have
exactly the same overhead as before. The extra overhead of
deduplication is amortized across insertions, just like the overhead of
page splits. The key space of indexes works in the same way as it has
since commit dd299df8 (the commit that made heap TID a tiebreaker
column).
Testing has shown that nbtree deduplication can generally make indexes
with about 10 or 15 tuples for each distinct key value about 2.5X - 4X
smaller, even with single column integer indexes (e.g., an index on a
referencing column that accompanies a foreign key). The final size of
single column nbtree indexes comes close to the final size of a similar
contrib/btree_gin index, at least in cases where GIN's posting list
compression isn't very effective. This can significantly improve
transaction throughput, and significantly reduce the cost of vacuuming
indexes.
A new index storage parameter (deduplicate_items) controls the use of
deduplication. The default setting is 'on', so all new B-Tree indexes
automatically use deduplication where possible. This decision will be
reviewed at the end of the Postgres 13 beta period.
There is a regression of approximately 2% of transaction throughput with
synthetic workloads that consist of append-only inserts into a table
with several non-unique indexes, where all indexes have few or no
repeated values. The underlying issue is that cycles are wasted on
unsuccessful attempts at deduplicating items in non-unique indexes.
There doesn't seem to be a way around it short of disabling
deduplication entirely. Note that deduplication of items in unique
indexes is fairly well targeted in general, which avoids the problem
there (we can use a special heuristic to trigger deduplication passes in
unique indexes, since we're specifically targeting "version bloat").
Bump XLOG_PAGE_MAGIC because xl_btree_vacuum changed.
No bump in BTREE_VERSION, since the representation of posting list
tuples works in a way that's backwards compatible with version 4 indexes
(i.e. indexes built on PostgreSQL 12). However, users must still
REINDEX a pg_upgrade'd index to use deduplication, regardless of the
Postgres version they've upgraded from. This is the only way to set the
new nbtree metapage flag indicating that deduplication is generally
safe.
Author: Anastasia Lubennikova, Peter Geoghegan
Reviewed-By: Peter Geoghegan, Heikki Linnakangas
Discussion:
https://postgr.es/m/55E4051B.7020209@postgrespro.ru
https://postgr.es/m/4ab6e2db-bcee-f4cf-0916-3a06e6ccbb55@postgrespro.ru
2020-02-26 22:05:30 +01:00
|
|
|
-- get test coverage for "single value" deduplication strategy:
|
|
|
|
insert into btree_bpchar select 'foo' from generate_series(1,1500);
|
|
|
|
|
|
|
|
--
|
|
|
|
-- Perform unique checking, with and without the use of deduplication
|
|
|
|
--
|
|
|
|
CREATE TABLE dedup_unique_test_table (a int) WITH (autovacuum_enabled=false);
|
|
|
|
CREATE UNIQUE INDEX dedup_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=on);
|
|
|
|
CREATE UNIQUE INDEX plain_unique ON dedup_unique_test_table (a) WITH (deduplicate_items=off);
|
|
|
|
-- Generate enough garbage tuples in index to ensure that even the unique index
|
|
|
|
-- with deduplication enabled has to check multiple leaf pages during unique
|
|
|
|
-- checking (at least with a BLCKSZ of 8192 or less)
|
|
|
|
DO $$
|
|
|
|
BEGIN
|
|
|
|
FOR r IN 1..1350 LOOP
|
|
|
|
DELETE FROM dedup_unique_test_table;
|
|
|
|
INSERT INTO dedup_unique_test_table SELECT 1;
|
|
|
|
END LOOP;
|
|
|
|
END$$;
|
|
|
|
|
2020-04-29 01:12:56 +02:00
|
|
|
-- Exercise the LP_DEAD-bit-set tuple deletion code with a posting list tuple.
|
|
|
|
-- The implementation prefers deleting existing items to merging any duplicate
|
|
|
|
-- tuples into a posting list, so we need an explicit test to make sure we get
|
|
|
|
-- coverage (note that this test also assumes BLCKSZ is 8192 or less):
|
|
|
|
DROP INDEX plain_unique;
|
|
|
|
DELETE FROM dedup_unique_test_table WHERE a = 1;
|
|
|
|
INSERT INTO dedup_unique_test_table SELECT i FROM generate_series(0,450) i;
|
|
|
|
|
2014-11-19 18:24:58 +01:00
|
|
|
--
|
Make heap TID a tiebreaker nbtree index column.
Make nbtree treat all index tuples as having a heap TID attribute.
Index searches can distinguish duplicates by heap TID, since heap TID is
always guaranteed to be unique. This general approach has numerous
benefits for performance, and is prerequisite to teaching VACUUM to
perform "retail index tuple deletion".
Naively adding a new attribute to every pivot tuple has unacceptable
overhead (it bloats internal pages), so suffix truncation of pivot
tuples is added. This will usually truncate away the "extra" heap TID
attribute from pivot tuples during a leaf page split, and may also
truncate away additional user attributes. This can increase fan-out,
especially in a multi-column index. Truncation can only occur at the
attribute granularity, which isn't particularly effective, but works
well enough for now. A future patch may add support for truncating
"within" text attributes by generating truncated key values using new
opclass infrastructure.
Only new indexes (BTREE_VERSION 4 indexes) will have insertions that
treat heap TID as a tiebreaker attribute, or will have pivot tuples
undergo suffix truncation during a leaf page split (on-disk
compatibility with versions 2 and 3 is preserved). Upgrades to version
4 cannot be performed on-the-fly, unlike upgrades from version 2 to
version 3. contrib/amcheck continues to work with version 2 and 3
indexes, while also enforcing stricter invariants when verifying version
4 indexes. These stricter invariants are the same invariants described
by "3.1.12 Sequencing" from the Lehman and Yao paper.
A later patch will enhance the logic used by nbtree to pick a split
point. This patch is likely to negatively impact performance without
smarter choices around the precise point to split leaf pages at. Making
these two mostly-distinct sets of enhancements into distinct commits
seems like it might clarify their design, even though neither commit is
particularly useful on its own.
The maximum allowed size of new tuples is reduced by an amount equal to
the space required to store an extra MAXALIGN()'d TID in a new high key
during leaf page splits. The user-facing definition of the "1/3 of a
page" restriction is already imprecise, and so does not need to be
revised. However, there should be a compatibility note in the v12
release notes.
Author: Peter Geoghegan
Reviewed-By: Heikki Linnakangas, Alexander Korotkov
Discussion: https://postgr.es/m/CAH2-WzkVb0Kom=R+88fDFb=JSxZMFvbHVC6Mn9LJ2n=X=kS-Uw@mail.gmail.com
2019-03-20 18:04:01 +01:00
|
|
|
-- Test B-tree fast path (cache rightmost leaf page) optimization.
|
2014-11-19 18:24:58 +01:00
|
|
|
--
|
|
|
|
|
Make heap TID a tiebreaker nbtree index column.
Make nbtree treat all index tuples as having a heap TID attribute.
Index searches can distinguish duplicates by heap TID, since heap TID is
always guaranteed to be unique. This general approach has numerous
benefits for performance, and is prerequisite to teaching VACUUM to
perform "retail index tuple deletion".
Naively adding a new attribute to every pivot tuple has unacceptable
overhead (it bloats internal pages), so suffix truncation of pivot
tuples is added. This will usually truncate away the "extra" heap TID
attribute from pivot tuples during a leaf page split, and may also
truncate away additional user attributes. This can increase fan-out,
especially in a multi-column index. Truncation can only occur at the
attribute granularity, which isn't particularly effective, but works
well enough for now. A future patch may add support for truncating
"within" text attributes by generating truncated key values using new
opclass infrastructure.
Only new indexes (BTREE_VERSION 4 indexes) will have insertions that
treat heap TID as a tiebreaker attribute, or will have pivot tuples
undergo suffix truncation during a leaf page split (on-disk
compatibility with versions 2 and 3 is preserved). Upgrades to version
4 cannot be performed on-the-fly, unlike upgrades from version 2 to
version 3. contrib/amcheck continues to work with version 2 and 3
indexes, while also enforcing stricter invariants when verifying version
4 indexes. These stricter invariants are the same invariants described
by "3.1.12 Sequencing" from the Lehman and Yao paper.
A later patch will enhance the logic used by nbtree to pick a split
point. This patch is likely to negatively impact performance without
smarter choices around the precise point to split leaf pages at. Making
these two mostly-distinct sets of enhancements into distinct commits
seems like it might clarify their design, even though neither commit is
particularly useful on its own.
The maximum allowed size of new tuples is reduced by an amount equal to
the space required to store an extra MAXALIGN()'d TID in a new high key
during leaf page splits. The user-facing definition of the "1/3 of a
page" restriction is already imprecise, and so does not need to be
revised. However, there should be a compatibility note in the v12
release notes.
Author: Peter Geoghegan
Reviewed-By: Heikki Linnakangas, Alexander Korotkov
Discussion: https://postgr.es/m/CAH2-WzkVb0Kom=R+88fDFb=JSxZMFvbHVC6Mn9LJ2n=X=kS-Uw@mail.gmail.com
2019-03-20 18:04:01 +01:00
|
|
|
-- First create a tree that's at least three levels deep (i.e. has one level
|
|
|
|
-- between the root and leaf levels). The text inserted is long. It won't be
|
2020-04-29 01:12:56 +02:00
|
|
|
-- TOAST compressed because we use plain storage in the table. Only a few
|
|
|
|
-- index tuples fit on each internal page, allowing us to get a tall tree with
|
|
|
|
-- few pages. (A tall tree is required to trigger caching.)
|
2014-11-19 18:24:58 +01:00
|
|
|
--
|
Make heap TID a tiebreaker nbtree index column.
Make nbtree treat all index tuples as having a heap TID attribute.
Index searches can distinguish duplicates by heap TID, since heap TID is
always guaranteed to be unique. This general approach has numerous
benefits for performance, and is prerequisite to teaching VACUUM to
perform "retail index tuple deletion".
Naively adding a new attribute to every pivot tuple has unacceptable
overhead (it bloats internal pages), so suffix truncation of pivot
tuples is added. This will usually truncate away the "extra" heap TID
attribute from pivot tuples during a leaf page split, and may also
truncate away additional user attributes. This can increase fan-out,
especially in a multi-column index. Truncation can only occur at the
attribute granularity, which isn't particularly effective, but works
well enough for now. A future patch may add support for truncating
"within" text attributes by generating truncated key values using new
opclass infrastructure.
Only new indexes (BTREE_VERSION 4 indexes) will have insertions that
treat heap TID as a tiebreaker attribute, or will have pivot tuples
undergo suffix truncation during a leaf page split (on-disk
compatibility with versions 2 and 3 is preserved). Upgrades to version
4 cannot be performed on-the-fly, unlike upgrades from version 2 to
version 3. contrib/amcheck continues to work with version 2 and 3
indexes, while also enforcing stricter invariants when verifying version
4 indexes. These stricter invariants are the same invariants described
by "3.1.12 Sequencing" from the Lehman and Yao paper.
A later patch will enhance the logic used by nbtree to pick a split
point. This patch is likely to negatively impact performance without
smarter choices around the precise point to split leaf pages at. Making
these two mostly-distinct sets of enhancements into distinct commits
seems like it might clarify their design, even though neither commit is
particularly useful on its own.
The maximum allowed size of new tuples is reduced by an amount equal to
the space required to store an extra MAXALIGN()'d TID in a new high key
during leaf page splits. The user-facing definition of the "1/3 of a
page" restriction is already imprecise, and so does not need to be
revised. However, there should be a compatibility note in the v12
release notes.
Author: Peter Geoghegan
Reviewed-By: Heikki Linnakangas, Alexander Korotkov
Discussion: https://postgr.es/m/CAH2-WzkVb0Kom=R+88fDFb=JSxZMFvbHVC6Mn9LJ2n=X=kS-Uw@mail.gmail.com
2019-03-20 18:04:01 +01:00
|
|
|
-- The text column must be the leading column in the index, since suffix
|
|
|
|
-- truncation would otherwise truncate tuples on internal pages, leaving us
|
|
|
|
-- with a short tree.
|
|
|
|
create table btree_tall_tbl(id int4, t text);
|
|
|
|
alter table btree_tall_tbl alter COLUMN t set storage plain;
|
|
|
|
create index btree_tall_idx on btree_tall_tbl (t, id) with (fillfactor = 10);
|
|
|
|
insert into btree_tall_tbl select g, repeat('x', 250)
|
|
|
|
from generate_series(1, 130) g;
|
Skip full index scan during cleanup of B-tree indexes when possible
Vacuum of index consists from two stages: multiple (zero of more) ambulkdelete
calls and one amvacuumcleanup call. When workload on particular table
is append-only, then autovacuum isn't intended to touch this table. However,
user may run vacuum manually in order to fill visibility map and get benefits
of index-only scans. Then ambulkdelete wouldn't be called for indexes
of such table (because no heap tuples were deleted), only amvacuumcleanup would
be called In this case, amvacuumcleanup would perform full index scan for
two objectives: put recyclable pages into free space map and update index
statistics.
This patch allows btvacuumclanup to skip full index scan when two conditions
are satisfied: no pages are going to be put into free space map and index
statistics isn't stalled. In order to check first condition, we store
oldest btpo_xact in the meta-page. When it's precedes RecentGlobalXmin, then
there are some recyclable pages. In order to check second condition we store
number of heap tuples observed during previous full index scan by cleanup.
If fraction of newly inserted tuples is less than
vacuum_cleanup_index_scale_factor, then statistics isn't considered to be
stalled. vacuum_cleanup_index_scale_factor can be defined as both reloption and GUC (default).
This patch bumps B-tree meta-page version. Upgrade of meta-page is performed
"on the fly": during VACUUM meta-page is rewritten with new version. No special
handling in pg_upgrade is required.
Author: Masahiko Sawada, Alexander Korotkov
Review by: Peter Geoghegan, Kyotaro Horiguchi, Alexander Korotkov, Yura Sokolov
Discussion: https://www.postgresql.org/message-id/flat/CAD21AoAX+d2oD_nrd9O2YkpzHaFr=uQeGr9s1rKC3O4ENc568g@mail.gmail.com
2018-04-04 18:29:00 +02:00
|
|
|
|
Split up a couple of long-running regression test scripts.
The point of this change is to increase the potential for parallelism
while running the core regression tests. Most people these days are
using parallel testing modes on multi-core machines, so we might as
well try a bit harder to keep multiple cores busy. Hence, a test that
runs much longer than others in its parallel group is a candidate to
be sub-divided.
In this patch, create_index.sql and join.sql are split up.
I haven't changed the content of the tests in any way, just
moved them.
I moved create_index.sql's SP-GiST-related tests into a new script
create_index_spgist, and moved its btree multilevel page deletion test
over to the existing script btree_index. (btree_index is a more natural
home for that test, and it's shorter than others in its parallel group,
so this doesn't hurt total runtime of that group.) There might be
room for more aggressive splitting of create_index, but this is enough
to improve matters considerably.
Likewise, I moved join.sql's "exercises for the hash join code" into
a new file join_hash. Those exercises contributed three-quarters of
the script's runtime. Which might well be excessive ... but for the
moment, I'm satisfied with shoving them into a different parallel
group, where they can share runtime with the roughly-equally-lengthy
gist test.
(Note for anybody following along at home: there are interesting
interactions between the runtimes of create_index and anything running
in parallel with it, because the tests of CREATE INDEX CONCURRENTLY
in that file will repeatedly block waiting for concurrent transactions
to commit. As committed in this patch, create_index and
create_index_spgist have roughly equal runtimes, but that's mostly an
artifact of forced synchronization of the CONCURRENTLY tests; when run
serially, create_index is much faster. A followup patch will reduce
the runtime of create_index_spgist and thereby also create_index.)
Discussion: https://postgr.es/m/735.1554935715@sss.pgh.pa.us
2019-04-11 22:15:54 +02:00
|
|
|
--
|
|
|
|
-- Test for multilevel page deletion
|
|
|
|
--
|
|
|
|
CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
|
|
|
|
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
|
|
|
|
ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
|
|
|
|
-- Delete most entries, and vacuum, deleting internal pages and creating "fast
|
|
|
|
-- root"
|
|
|
|
DELETE FROM delete_test_table WHERE a < 79990;
|
|
|
|
VACUUM delete_test_table;
|
|
|
|
|
|
|
|
--
|
|
|
|
-- Test B-tree insertion with a metapage update (XLOG_BTREE_INSERT_META
|
|
|
|
-- WAL record type). This happens when a "fast root" page is split. This
|
|
|
|
-- also creates coverage for nbtree FSM page recycling.
|
|
|
|
--
|
|
|
|
-- The vacuum above should've turned the leaf page into a fast root. We just
|
|
|
|
-- need to insert some rows to cause the fast root page to split.
|
|
|
|
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,1000) i;
|
Implement operator class parameters
PostgreSQL provides set of template index access methods, where opclasses have
much freedom in the semantics of indexing. These index AMs are GiST, GIN,
SP-GiST and BRIN. There opclasses define representation of keys, operations on
them and supported search strategies. So, it's natural that opclasses may be
faced some tradeoffs, which require user-side decision. This commit implements
opclass parameters allowing users to set some values, which tell opclass how to
index the particular dataset.
This commit doesn't introduce new storage in system catalog. Instead it uses
pg_attribute.attoptions, which is used for table column storage options but
unused for index attributes.
In order to evade changing signature of each opclass support function, we
implement unified way to pass options to opclass support functions. Options
are set to fn_expr as the constant bytea expression. It's possible due to the
fact that opclass support functions are executed outside of expressions, so
fn_expr is unused for them.
This commit comes with some examples of opclass options usage. We parametrize
signature length in GiST. That applies to multiple opclasses: tsvector_ops,
gist__intbig_ops, gist_ltree_ops, gist__ltree_ops, gist_trgm_ops and
gist_hstore_ops. Also we parametrize maximum number of integer ranges for
gist__int_ops. However, the main future usage of this feature is expected
to be json, where users would be able to specify which way to index particular
json parts.
Catversion is bumped.
Discussion: https://postgr.es/m/d22c3a18-31c7-1879-fc11-4c1ce2f5e5af%40postgrespro.ru
Author: Nikita Glukhov, revised by me
Reviwed-by: Nikolay Shaplov, Robert Haas, Tom Lane, Tomas Vondra, Alvaro Herrera
2020-03-30 18:17:11 +02:00
|
|
|
|
|
|
|
-- Test unsupported btree opclass parameters
|
|
|
|
create index on btree_tall_tbl (id int4_ops(foo=1));
|
Block ALTER INDEX/TABLE index_name ALTER COLUMN colname SET (options)
The grammar of this command run on indexes with column names has always
been authorized by the parser, and it has never been documented.
Since 911e702, it is possible to define opclass parameters as of CREATE
INDEX, which actually broke the old case of ALTER INDEX/TABLE where
relation-level parameters n_distinct and n_distinct_inherited could be
defined for an index (see 76a47c0 and its thread where this point has
been touched, still remained unused). Attempting to do that in v13~
would cause the index to become unusable, as there is a new dedicated
code path to load opclass parameters instead of the relation-level ones
previously available. Note that it is possible to fix things with a
manual catalog update to bring the relation back online.
This commit disables this command for now as the use of column names for
indexes does not make sense anyway, particularly when it comes to index
expressions where names are automatically computed. One way to properly
support this case properly in the future would be to use column numbers
when it comes to indexes, in the same way as ALTER INDEX .. ALTER COLUMN
.. SET STATISTICS.
Partitioned indexes were already blocked, but not indexes. Some tests
are added for both cases.
There was some code in ANALYZE to enforce n_distinct to be used for an
index expression if the parameter was defined, but just remove it for
now until/if there is support for this (note that index-level parameters
never had support in pg_dump either, previously), so this was just dead
code.
Reported-by: Matthijs van der Vleuten
Author: Nathan Bossart, Michael Paquier
Reviewed-by: Vik Fearing, Dilip Kumar
Discussion: https://postgr.es/m/17220-15d684c6c2171a83@postgresql.org
Backpatch-through: 13
2021-10-19 04:03:52 +02:00
|
|
|
|
|
|
|
-- Test case of ALTER INDEX with abuse of column names for indexes.
|
|
|
|
-- This grammar is not officially supported, but the parser allows it.
|
|
|
|
CREATE INDEX btree_tall_idx2 ON btree_tall_tbl (id);
|
|
|
|
ALTER INDEX btree_tall_idx2 ALTER COLUMN id SET (n_distinct=100);
|
|
|
|
DROP INDEX btree_tall_idx2;
|
|
|
|
-- Partitioned index
|
|
|
|
CREATE TABLE btree_part (id int4) PARTITION BY RANGE (id);
|
|
|
|
CREATE INDEX btree_part_idx ON btree_part(id);
|
|
|
|
ALTER INDEX btree_part_idx ALTER COLUMN id SET (n_distinct=100);
|
|
|
|
DROP TABLE btree_part;
|