From 485f0aa85995340fb62113448c992ee48dc6fff1 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 21 Mar 2024 18:27:49 -0400 Subject: [PATCH] Add hash support functions and hash opclass for contrib/ltree. This also enables hash join and hash aggregation on ltree columns. Tommy Pavlicek, reviewed by jian he Discussion: https://postgr.es/m/CAEhP-W9ZEoHeaP_nKnPCVd_o1c3BAUvq1gWHrq8EbkNRiS9CvQ@mail.gmail.com --- contrib/ltree/Makefile | 3 +- contrib/ltree/expected/ltree.out | 68 ++++++++++++++++++++++++++++++ contrib/ltree/ltree--1.2--1.3.sql | 23 +++++++++++ contrib/ltree/ltree.control | 2 +- contrib/ltree/ltree_op.c | 69 +++++++++++++++++++++++++++++++ contrib/ltree/ltreetest.sql | 1 + contrib/ltree/meson.build | 3 +- contrib/ltree/sql/ltree.sql | 47 +++++++++++++++++++++ doc/src/sgml/ltree.sgml | 8 ++++ 9 files changed, 221 insertions(+), 3 deletions(-) create mode 100644 contrib/ltree/ltree--1.2--1.3.sql diff --git a/contrib/ltree/Makefile b/contrib/ltree/Makefile index 770769a730..e92d971f3d 100644 --- a/contrib/ltree/Makefile +++ b/contrib/ltree/Makefile @@ -14,7 +14,8 @@ OBJS = \ ltxtquery_op.o EXTENSION = ltree -DATA = ltree--1.1--1.2.sql ltree--1.1.sql ltree--1.0--1.1.sql +DATA = ltree--1.2--1.3.sql ltree--1.1--1.2.sql ltree--1.1.sql \ + ltree--1.0--1.1.sql PGFILEDESC = "ltree - hierarchical label data type" HEADERS = ltree.h diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out index 984cd030cf..c8eac3f6b2 100644 --- a/contrib/ltree/expected/ltree.out +++ b/contrib/ltree/expected/ltree.out @@ -1433,8 +1433,27 @@ SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e'; g.b.c.d.e (1 row) +-- Check that the hash_ltree() and hash_ltree_extended() function's lower +-- 32 bits match when the seed is 0 and do not match when the seed != 0 +SELECT v as value, hash_ltree(v)::bit(32) as standard, + hash_ltree_extended(v, 0)::bit(32) as extended0, + hash_ltree_extended(v, 1)::bit(32) as extended1 +FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree), + ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v) +WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32) + OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32); + value | standard | extended0 | extended1 +-------+----------+-----------+----------- +(0 rows) + CREATE TABLE ltreetest (t ltree); \copy ltreetest FROM 'data/ltree.data' +SELECT count(*) from ltreetest; + count +------- + 1006 +(1 row) + SELECT * FROM ltreetest WHERE t < '12.3' order by t asc; t ---------------------------------- @@ -7833,6 +7852,55 @@ SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc; (4 rows) drop index tstidx; +--- test hash index +create index tstidx on ltreetest using hash (t); +set enable_seqscan=off; +set enable_bitmapscan=off; +EXPLAIN (COSTS OFF) +SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; + QUERY PLAN +-------------------------------------- + Index Scan using tstidx on ltreetest + Index Cond: (t = '12.3'::ltree) +(2 rows) + +SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; + t +------ + 12.3 +(1 row) + +reset enable_seqscan; +reset enable_bitmapscan; +-- test hash aggregate +set enable_hashagg=on; +set enable_sort=off; +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + QUERY PLAN +----------------------------------------------------- + Aggregate + -> HashAggregate + Group Key: ltreetest.t + -> Append + -> Seq Scan on ltreetest + -> Seq Scan on ltreetest ltreetest_1 +(6 rows) + +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + count +------- + 1006 +(1 row) + +reset enable_hashagg; +reset enable_sort; +drop index tstidx; +-- test gist index create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0)); ERROR: value 0 out of bounds for option "siglen" DETAIL: Valid values are between "4" and "2024". diff --git a/contrib/ltree/ltree--1.2--1.3.sql b/contrib/ltree/ltree--1.2--1.3.sql new file mode 100644 index 0000000000..bc9a34dd59 --- /dev/null +++ b/contrib/ltree/ltree--1.2--1.3.sql @@ -0,0 +1,23 @@ +/* contrib/ltree/ltree--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ltree UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION hash_ltree(ltree) +RETURNS integer +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE FUNCTION hash_ltree_extended(ltree, bigint) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE; + +CREATE OPERATOR CLASS hash_ltree_ops +DEFAULT FOR TYPE ltree USING hash +AS + OPERATOR 1 = , + FUNCTION 1 hash_ltree(ltree), + FUNCTION 2 hash_ltree_extended(ltree, bigint); + +ALTER OPERATOR =(ltree, ltree) SET (HASHES); diff --git a/contrib/ltree/ltree.control b/contrib/ltree/ltree.control index b408d64781..c2cbeda96c 100644 --- a/contrib/ltree/ltree.control +++ b/contrib/ltree/ltree.control @@ -1,6 +1,6 @@ # ltree extension comment = 'data type for hierarchical tree-like structures' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/ltree' relocatable = true trusted = true diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c index da1db5fcd2..24a21d3ea0 100644 --- a/contrib/ltree/ltree_op.c +++ b/contrib/ltree/ltree_op.c @@ -9,6 +9,7 @@ #include "access/htup_details.h" #include "catalog/pg_statistic.h" +#include "common/hashfn.h" #include "ltree.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -24,6 +25,8 @@ PG_FUNCTION_INFO_V1(ltree_eq); PG_FUNCTION_INFO_V1(ltree_ne); PG_FUNCTION_INFO_V1(ltree_ge); PG_FUNCTION_INFO_V1(ltree_gt); +PG_FUNCTION_INFO_V1(hash_ltree); +PG_FUNCTION_INFO_V1(hash_ltree_extended); PG_FUNCTION_INFO_V1(nlevel); PG_FUNCTION_INFO_V1(ltree_isparent); PG_FUNCTION_INFO_V1(ltree_risparent); @@ -129,6 +132,72 @@ ltree_ne(PG_FUNCTION_ARGS) PG_RETURN_BOOL(res != 0); } +/* Compute a hash for the ltree */ +Datum +hash_ltree(PG_FUNCTION_ARGS) +{ + ltree *a = PG_GETARG_LTREE_P(0); + uint32 result = 1; + int an = a->numlevel; + ltree_level *al = LTREE_FIRST(a); + + while (an > 0) + { + uint32 levelHash = DatumGetUInt32(hash_any((unsigned char *) al->name, al->len)); + + /* + * Combine hash values of successive elements by multiplying the + * current value by 31 and adding on the new element's hash value. + * + * This method is borrowed from hash_array(), which see for further + * commentary. + */ + result = (result << 5) - result + levelHash; + + an--; + al = LEVEL_NEXT(al); + } + + PG_FREE_IF_COPY(a, 0); + PG_RETURN_UINT32(result); +} + +/* Compute an extended hash for the ltree */ +Datum +hash_ltree_extended(PG_FUNCTION_ARGS) +{ + ltree *a = PG_GETARG_LTREE_P(0); + const uint64 seed = PG_GETARG_INT64(1); + uint64 result = 1; + int an = a->numlevel; + ltree_level *al = LTREE_FIRST(a); + + /* + * If the path has length zero, return 1 + seed to ensure that the low 32 + * bits of the result match hash_ltree when the seed is 0, as required by + * the hash index support functions, but to also return a different value + * when there is a seed. + */ + if (an == 0) + { + PG_FREE_IF_COPY(a, 0); + PG_RETURN_UINT64(result + seed); + } + + while (an > 0) + { + uint64 levelHash = DatumGetUInt64(hash_any_extended((unsigned char *) al->name, al->len, seed)); + + result = (result << 5) - result + levelHash; + + an--; + al = LEVEL_NEXT(al); + } + + PG_FREE_IF_COPY(a, 0); + PG_RETURN_UINT64(result); +} + Datum nlevel(PG_FUNCTION_ARGS) { diff --git a/contrib/ltree/ltreetest.sql b/contrib/ltree/ltreetest.sql index d6996caf3c..388d5bb6f5 100644 --- a/contrib/ltree/ltreetest.sql +++ b/contrib/ltree/ltreetest.sql @@ -19,3 +19,4 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies'); INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts'); CREATE INDEX path_gist_idx ON test USING gist(path); CREATE INDEX path_idx ON test USING btree(path); +CREATE INDEX path_hash_idx ON test USING hash(path); diff --git a/contrib/ltree/meson.build b/contrib/ltree/meson.build index 5862943e39..1ea9603d45 100644 --- a/contrib/ltree/meson.build +++ b/contrib/ltree/meson.build @@ -30,8 +30,9 @@ contrib_targets += ltree install_data( 'ltree.control', 'ltree--1.0--1.1.sql', - 'ltree--1.1--1.2.sql', 'ltree--1.1.sql', + 'ltree--1.1--1.2.sql', + 'ltree--1.2--1.3.sql', kwargs: contrib_data_args, ) diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql index 402096f6c4..dd705d9d7c 100644 --- a/contrib/ltree/sql/ltree.sql +++ b/contrib/ltree/sql/ltree.sql @@ -282,9 +282,21 @@ SELECT ('{3456,1.2.3.4}'::ltree[] ?<@ '1.2.5') is null; SELECT '{ltree.asd, tree.awdfg}'::ltree[] ?@ 'tree & aWdfg@'::ltxtquery; SELECT '{j.k.l.m, g.b.c.d.e}'::ltree[] ?~ 'A*@|g.b.c.d.e'; +-- Check that the hash_ltree() and hash_ltree_extended() function's lower +-- 32 bits match when the seed is 0 and do not match when the seed != 0 +SELECT v as value, hash_ltree(v)::bit(32) as standard, + hash_ltree_extended(v, 0)::bit(32) as extended0, + hash_ltree_extended(v, 1)::bit(32) as extended1 +FROM (VALUES (NULL::ltree), (''::ltree), ('0'::ltree), ('0.1'::ltree), + ('0.1.2'::ltree), ('0'::ltree), ('0_asd.1_ASD'::ltree)) x(v) +WHERE hash_ltree(v)::bit(32) != hash_ltree_extended(v, 0)::bit(32) + OR hash_ltree(v)::bit(32) = hash_ltree_extended(v, 1)::bit(32); + CREATE TABLE ltreetest (t ltree); \copy ltreetest FROM 'data/ltree.data' +SELECT count(*) from ltreetest; + SELECT * FROM ltreetest WHERE t < '12.3' order by t asc; SELECT * FROM ltreetest WHERE t <= '12.3' order by t asc; SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; @@ -329,6 +341,41 @@ SELECT * FROM ltreetest WHERE t ~ '23.*.2' order by t asc; SELECT * FROM ltreetest WHERE t ? '{23.*.1,23.*.2}' order by t asc; drop index tstidx; + +--- test hash index + +create index tstidx on ltreetest using hash (t); +set enable_seqscan=off; +set enable_bitmapscan=off; + +EXPLAIN (COSTS OFF) +SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; +SELECT * FROM ltreetest WHERE t = '12.3' order by t asc; + +reset enable_seqscan; +reset enable_bitmapscan; + +-- test hash aggregate + +set enable_hashagg=on; +set enable_sort=off; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + +SELECT count(*) FROM ( +SELECT t FROM (SELECT * FROM ltreetest UNION ALL SELECT * FROM ltreetest) t1 GROUP BY t +) t2; + +reset enable_hashagg; +reset enable_sort; + +drop index tstidx; + +-- test gist index + create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=0)); create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2025)); create index tstidx on ltreetest using gist (t gist_ltree_ops(siglen=2028)); diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml index 00a6ae70da..9584105b03 100644 --- a/doc/src/sgml/ltree.sgml +++ b/doc/src/sgml/ltree.sgml @@ -623,6 +623,13 @@ Europe & Russia*@ & !Transportation >=, > + + + Hash index over ltree: + = + + + GiST index over ltree (gist_ltree_ops @@ -712,6 +719,7 @@ INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Galaxies'); INSERT INTO test VALUES ('Top.Collections.Pictures.Astronomy.Astronauts'); CREATE INDEX path_gist_idx ON test USING GIST (path); CREATE INDEX path_idx ON test USING BTREE (path); +CREATE INDEX path_hash_idx ON test USING HASH (path);