From 612a1ab76724aa1514b6509269342649f8cab375 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 26 Feb 2020 11:28:25 -0800 Subject: [PATCH] Add equalimage B-Tree support functions. Invent the concept of a B-Tree equalimage ("equality implies image equality") support function, registered as support function 4. This indicates whether it is safe (or not safe) to apply optimizations that assume that any two datums considered equal by an operator class's order method must be interchangeable without any loss of semantic information. This is static information about an operator class and a collation. Register an equalimage routine for almost all of the existing B-Tree opclasses. We only need two trivial routines for all of the opclasses that are included with the core distribution. There is one routine for opclasses that index non-collatable types (which returns 'true' unconditionally), plus another routine for collatable types (which returns 'true' when the collation is a deterministic collation). This patch is infrastructure for an upcoming patch that adds B-Tree deduplication. Author: Peter Geoghegan, Anastasia Lubennikova Discussion: https://postgr.es/m/CAH2-Wzn3Ee49Gmxb7V1VJ3-AC8fWn-Fr8pfWQebHe8rYRxt5OQ@mail.gmail.com --- doc/src/sgml/btree.sgml | 96 ++++++++++++++++++++- doc/src/sgml/ref/alter_opfamily.sgml | 7 +- doc/src/sgml/ref/create_opclass.sgml | 14 +-- doc/src/sgml/xindex.sgml | 18 +++- src/backend/access/nbtree/nbtutils.c | 73 ++++++++++++++++ src/backend/access/nbtree/nbtvalidate.c | 8 +- src/backend/commands/opclasscmds.c | 30 ++++++- src/backend/utils/adt/datum.c | 26 ++++++ src/backend/utils/adt/varlena.c | 20 +++++ src/bin/pg_dump/t/002_pg_dump.pl | 12 ++- src/include/access/nbtree.h | 8 +- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_amproc.dat | 60 +++++++++++++ src/include/catalog/pg_proc.dat | 6 ++ src/test/regress/expected/alter_generic.out | 8 +- src/test/regress/expected/opr_sanity.out | 36 ++++++++ src/test/regress/sql/alter_generic.sql | 3 + src/test/regress/sql/opr_sanity.sql | 18 ++++ 18 files changed, 418 insertions(+), 27 deletions(-) diff --git a/doc/src/sgml/btree.sgml b/doc/src/sgml/btree.sgml index ac6c4423e6..fcf771c857 100644 --- a/doc/src/sgml/btree.sgml +++ b/doc/src/sgml/btree.sgml @@ -207,7 +207,7 @@ As shown in , btree defines - one required and two optional support functions. The three + one required and three optional support functions. The four user-defined methods are: @@ -456,6 +456,100 @@ returns bool + + equalimage + + + Optionally, a btree operator family may provide + equalimage (equality implies image + equality) support functions, registered under support + function number 4. These functions allow the core code to + determine when it is safe to apply the btree deduplication + optimization. Currently, equalimage + functions are only called when building or rebuilding an index. + + + An equalimage function must have the + signature + +equalimage(opcintype oid) returns bool + + The return value is static information about an operator class + and collation. Returning true indicates that + the order function for the operator class is + guaranteed to only return 0 (arguments + are equal) when its A and + B arguments are also interchangeable + without any loss of semantic information. Not registering an + equalimage function or returning + false indicates that this condition cannot be + assumed to hold. + + + The opcintype argument is the + pg_type.oid of the + data type that the operator class indexes. This is a convenience + that allows reuse of the same underlying + equalimage function across operator classes. + If opcintype is a collatable data + type, the appropriate collation OID will be passed to the + equalimage function, using the standard + PG_GET_COLLATION() mechanism. + + + As far as the operator class is concerned, returning + true indicates that deduplication is safe (or + safe for the collation whose OID was passed to its + equalimage function). However, the core + code will only deem deduplication safe for an index when + every indexed column uses an operator class + that registers an equalimage function, and + each function actually returns true when + called. + + + Image equality is almost the same condition + as simple bitwise equality. There is one subtle difference: When + indexing a varlena data type, the on-disk representation of two + image equal datums may not be bitwise equal due to inconsistent + application of TOAST compression on input. + Formally, when an operator class's + equalimage function returns + true, it is safe to assume that the + datum_image_eq() C function will always agree + with the operator class's order function + (provided that the same collation OID is passed to both the + equalimage and order + functions). + + + The core code is fundamentally unable to deduce anything about + the equality implies image equality status of an + operator class within a multiple-data-type family based on + details from other operator classes in the same family. Also, it + is not sensible for an operator family to register a cross-type + equalimage function, and attempting to do so + will result in an error. This is because equality implies + image equality status does not just depend on + sorting/equality semantics, which are more or less defined at the + operator family level. In general, the semantics that one + particular data type implements must be considered separately. + + + The convention followed by the operator classes included with the + core PostgreSQL distribution is to + register a stock, generic equalimage + function. Most operator classes register + btequalimage(), which indicates that + deduplication is safe unconditionally. Operator classes for + collatable data types such as text register + btvarstrequalimage(), which indicates that + deduplication is safe with deterministic collations. Best + practice for third-party extensions is to register their own + custom function to retain control. + + + diff --git a/doc/src/sgml/ref/alter_opfamily.sgml b/doc/src/sgml/ref/alter_opfamily.sgml index 848156c9d7..4ac1cca95a 100644 --- a/doc/src/sgml/ref/alter_opfamily.sgml +++ b/doc/src/sgml/ref/alter_opfamily.sgml @@ -153,9 +153,10 @@ ALTER OPERATOR FAMILY name USING op_type since the function's input data type(s) are always the correct ones to use. For B-tree sort - support functions and all functions in GiST, SP-GiST and GIN operator - classes, it is necessary to specify the operand data type(s) the function - is to be used with. + support functions, B-Tree equal image functions, and all + functions in GiST, SP-GiST and GIN operator classes, it is + necessary to specify the operand data type(s) the function is to + be used with. diff --git a/doc/src/sgml/ref/create_opclass.sgml b/doc/src/sgml/ref/create_opclass.sgml index dd5252fd97..f42fb6494c 100644 --- a/doc/src/sgml/ref/create_opclass.sgml +++ b/doc/src/sgml/ref/create_opclass.sgml @@ -171,12 +171,14 @@ CREATE OPERATOR CLASS name [ DEFAUL function is intended to support, if different from the input data type(s) of the function (for B-tree comparison functions and hash functions) - or the class's data type (for B-tree sort support functions and all - functions in GiST, SP-GiST, GIN and BRIN operator classes). These defaults - are correct, and so op_type need not be specified in - FUNCTION clauses, except for the case of a B-tree sort - support function that is meant to support cross-data-type comparisons. + or the class's data type (for B-tree sort support functions, + B-tree equal image functions, and all functions in GiST, + SP-GiST, GIN and BRIN operator classes). These defaults are + correct, and so op_type need not be specified + in FUNCTION clauses, except for the case of a + B-tree sort support function that is meant to support + cross-data-type comparisons. diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index ffb5164aaa..2e06ad01bf 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -402,7 +402,7 @@ B-trees require a comparison support function, - and allow two additional support functions to be + and allow three additional support functions to be supplied at the operator class author's option, as shown in . The requirements for these support functions are explained further in @@ -441,6 +441,13 @@ 3 + + + Determine if it is safe for indexes that use the operator + class to apply the btree deduplication optimization (optional) + + 4 + @@ -980,7 +987,8 @@ DEFAULT FOR TYPE int8 USING btree FAMILY integer_ops AS OPERATOR 5 > , FUNCTION 1 btint8cmp(int8, int8) , FUNCTION 2 btint8sortsupport(internal) , - FUNCTION 3 in_range(int8, int8, int8, boolean, boolean) ; + FUNCTION 3 in_range(int8, int8, int8, boolean, boolean) , + FUNCTION 4 btequalimage(oid) ; CREATE OPERATOR CLASS int4_ops DEFAULT FOR TYPE int4 USING btree FAMILY integer_ops AS @@ -992,7 +1000,8 @@ DEFAULT FOR TYPE int4 USING btree FAMILY integer_ops AS OPERATOR 5 > , FUNCTION 1 btint4cmp(int4, int4) , FUNCTION 2 btint4sortsupport(internal) , - FUNCTION 3 in_range(int4, int4, int4, boolean, boolean) ; + FUNCTION 3 in_range(int4, int4, int4, boolean, boolean) , + FUNCTION 4 btequalimage(oid) ; CREATE OPERATOR CLASS int2_ops DEFAULT FOR TYPE int2 USING btree FAMILY integer_ops AS @@ -1004,7 +1013,8 @@ DEFAULT FOR TYPE int2 USING btree FAMILY integer_ops AS OPERATOR 5 > , FUNCTION 1 btint2cmp(int2, int2) , FUNCTION 2 btint2sortsupport(internal) , - FUNCTION 3 in_range(int2, int2, int2, boolean, boolean) ; + FUNCTION 3 in_range(int2, int2, int2, boolean, boolean) , + FUNCTION 4 btequalimage(oid) ; ALTER OPERATOR FAMILY integer_ops USING btree ADD -- cross-type comparisons int8 vs int2 diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 5ab4e712f1..af07732eab 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -20,6 +20,7 @@ #include "access/nbtree.h" #include "access/reloptions.h" #include "access/relscan.h" +#include "catalog/catalog.h" #include "commands/progress.h" #include "lib/qunique.h" #include "miscadmin.h" @@ -2566,3 +2567,75 @@ _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, "or use full text indexing."), errtableconstraint(heap, RelationGetRelationName(rel)))); } + +/* + * Are all attributes in rel "equality is image equality" attributes? + * + * We use each attribute's BTEQUALIMAGE_PROC opclass procedure. If any + * opclass either lacks a BTEQUALIMAGE_PROC procedure or returns false, we + * return false; otherwise we return true. + * + * Returned boolean value is stored in index metapage during index builds. + * Deduplication can only be used when we return true. + */ +bool +_bt_allequalimage(Relation rel, bool debugmessage) +{ + bool allequalimage = true; + + /* INCLUDE indexes don't support deduplication */ + if (IndexRelationGetNumberOfAttributes(rel) != + IndexRelationGetNumberOfKeyAttributes(rel)) + return false; + + /* + * There is no special reason why deduplication cannot work with system + * relations (i.e. with system catalog indexes and TOAST indexes). We + * deem deduplication unsafe for these indexes all the same, since the + * alternative is to force users to always use deduplication, without + * being able to opt out. (ALTER INDEX is not supported with system + * indexes, so users would have no way to set the deduplicate_items + * storage parameter to 'off'.) + */ + if (IsSystemRelation(rel)) + return false; + + for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(rel); i++) + { + Oid opfamily = rel->rd_opfamily[i]; + Oid opcintype = rel->rd_opcintype[i]; + Oid collation = rel->rd_indcollation[i]; + Oid equalimageproc; + + equalimageproc = get_opfamily_proc(opfamily, opcintype, opcintype, + BTEQUALIMAGE_PROC); + + /* + * If there is no BTEQUALIMAGE_PROC then deduplication is assumed to + * be unsafe. Otherwise, actually call proc and see what it says. + */ + if (!OidIsValid(equalimageproc) || + !DatumGetBool(OidFunctionCall1Coll(equalimageproc, collation, + ObjectIdGetDatum(opcintype)))) + { + allequalimage = false; + break; + } + } + + /* + * Don't elog() until here to avoid reporting on a system relation index + * or an INCLUDE index + */ + if (debugmessage) + { + if (allequalimage) + elog(DEBUG1, "index \"%s\" can safely use deduplication", + RelationGetRelationName(rel)); + else + elog(DEBUG1, "index \"%s\" cannot use deduplication", + RelationGetRelationName(rel)); + } + + return allequalimage; +} diff --git a/src/backend/access/nbtree/nbtvalidate.c b/src/backend/access/nbtree/nbtvalidate.c index ff634b1649..627f74407a 100644 --- a/src/backend/access/nbtree/nbtvalidate.c +++ b/src/backend/access/nbtree/nbtvalidate.c @@ -104,6 +104,10 @@ btvalidate(Oid opclassoid) procform->amprocrighttype, BOOLOID, BOOLOID); break; + case BTEQUALIMAGE_PROC: + ok = check_amproc_signature(procform->amproc, BOOLOID, true, + 1, 1, OIDOID); + break; default: ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -211,8 +215,8 @@ btvalidate(Oid opclassoid) /* * Complain if there seems to be an incomplete set of either operators - * or support functions for this datatype pair. The only things - * considered optional are the sortsupport and in_range functions. + * or support functions for this datatype pair. The sortsupport, + * in_range, and equalimage functions are considered optional. */ if (thisgroup->operatorset != ((1 << BTLessStrategyNumber) | diff --git a/src/backend/commands/opclasscmds.c b/src/backend/commands/opclasscmds.c index e2c6de457c..743511bdf2 100644 --- a/src/backend/commands/opclasscmds.c +++ b/src/backend/commands/opclasscmds.c @@ -1143,9 +1143,10 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) /* * btree comparison procs must be 2-arg procs returning int4. btree * sortsupport procs must take internal and return void. btree in_range - * procs must be 5-arg procs returning bool. hash support proc 1 must be - * a 1-arg proc returning int4, while proc 2 must be a 2-arg proc - * returning int8. Otherwise we don't know. + * procs must be 5-arg procs returning bool. btree equalimage procs must + * take 1 arg and return bool. hash support proc 1 must be a 1-arg proc + * returning int4, while proc 2 must be a 2-arg proc returning int8. + * Otherwise we don't know. */ if (amoid == BTREE_AM_OID) { @@ -1205,6 +1206,29 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid) if (!OidIsValid(member->righttype)) member->righttype = procform->proargtypes.values[2]; } + else if (member->number == BTEQUALIMAGE_PROC) + { + if (procform->pronargs != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must have one argument"))); + if (procform->prorettype != BOOLOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must return boolean"))); + /* + * pg_amproc functions are indexed by (lefttype, righttype), but + * an equalimage function can only be called at CREATE INDEX time. + * The same opclass opcintype OID is always used for leftype and + * righttype. Providing a cross-type routine isn't sensible. + * Reject cross-type ALTER OPERATOR FAMILY ... ADD FUNCTION 4 + * statements here. + */ + if (member->lefttype != member->righttype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("btree equal image functions must not be cross-type"))); + } } else if (amoid == HASH_AM_OID) { diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c index 4e81947352..34cdde1bb9 100644 --- a/src/backend/utils/adt/datum.c +++ b/src/backend/utils/adt/datum.c @@ -44,6 +44,7 @@ #include "access/detoast.h" #include "fmgr.h" +#include "utils/builtins.h" #include "utils/datum.h" #include "utils/expandeddatum.h" @@ -323,6 +324,31 @@ datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) return result; } +/*------------------------------------------------------------------------- + * btequalimage + * + * Generic "equalimage" support function. + * + * B-Tree operator classes whose equality function could safely be replaced by + * datum_image_eq() in all cases can use this as their "equalimage" support + * function. + * + * Currently, we unconditionally assume that any B-Tree operator class that + * registers btequalimage as its support function 4 must be able to safely use + * optimizations like deduplication (i.e. we return true unconditionally). If + * it ever proved necessary to rescind support for an operator class, we could + * do that in a targeted fashion by doing something with the opcintype + * argument. + *------------------------------------------------------------------------- + */ +Datum +btequalimage(PG_FUNCTION_ARGS) +{ + /* Oid opcintype = PG_GETARG_OID(0); */ + + PG_RETURN_BOOL(true); +} + /*------------------------------------------------------------------------- * datumEstimateSpace * diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 1b351cbc68..875b02d643 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2783,6 +2783,26 @@ varstr_abbrev_abort(int memtupcount, SortSupport ssup) return true; } +/* + * Generic equalimage support function for character type's operator classes. + * Disables the use of deduplication with nondeterministic collations. + */ +Datum +btvarstrequalimage(PG_FUNCTION_ARGS) +{ + /* Oid opcintype = PG_GETARG_OID(0); */ + Oid collid = PG_GET_COLLATION(); + + check_collation_set(collid); + + if (lc_collate_is_c(collid) || + collid == DEFAULT_COLLATION_OID || + get_collation_isdeterministic(collid)) + PG_RETURN_BOOL(true); + else + PG_RETURN_BOOL(false); +} + Datum text_larger(PG_FUNCTION_ARGS) { diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 4a9764c2d2..1b90cbd9b5 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -522,7 +522,8 @@ my %tests = ( OPERATOR 4 >=(bigint,int4), OPERATOR 5 >(bigint,int4), FUNCTION 1 (int4, int4) btint4cmp(int4,int4), - FUNCTION 2 (int4, int4) btint4sortsupport(internal);', + FUNCTION 2 (int4, int4) btint4sortsupport(internal), + FUNCTION 4 (int4, int4) btequalimage(oid);', regexp => qr/^ \QALTER OPERATOR FAMILY dump_test.op_family USING btree ADD\E\n\s+ \QOPERATOR 1 <(bigint,integer) ,\E\n\s+ @@ -531,7 +532,8 @@ my %tests = ( \QOPERATOR 4 >=(bigint,integer) ,\E\n\s+ \QOPERATOR 5 >(bigint,integer) ,\E\n\s+ \QFUNCTION 1 (integer, integer) btint4cmp(integer,integer) ,\E\n\s+ - \QFUNCTION 2 (integer, integer) btint4sortsupport(internal);\E + \QFUNCTION 2 (integer, integer) btint4sortsupport(internal) ,\E\n\s+ + \QFUNCTION 4 (integer, integer) btequalimage(oid);\E /xm, like => { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, @@ -1554,7 +1556,8 @@ my %tests = ( OPERATOR 4 >=(bigint,bigint), OPERATOR 5 >(bigint,bigint), FUNCTION 1 btint8cmp(bigint,bigint), - FUNCTION 2 btint8sortsupport(internal);', + FUNCTION 2 btint8sortsupport(internal), + FUNCTION 4 btequalimage(oid);', regexp => qr/^ \QCREATE OPERATOR CLASS dump_test.op_class\E\n\s+ \QFOR TYPE bigint USING btree FAMILY dump_test.op_family AS\E\n\s+ @@ -1564,7 +1567,8 @@ my %tests = ( \QOPERATOR 4 >=(bigint,bigint) ,\E\n\s+ \QOPERATOR 5 >(bigint,bigint) ,\E\n\s+ \QFUNCTION 1 (bigint, bigint) btint8cmp(bigint,bigint) ,\E\n\s+ - \QFUNCTION 2 (bigint, bigint) btint8sortsupport(internal);\E + \QFUNCTION 2 (bigint, bigint) btint8sortsupport(internal) ,\E\n\s+ + \QFUNCTION 4 (bigint, bigint) btequalimage(oid);\E /xm, like => { %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 20ace69dab..e8d4d2b55b 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -387,12 +387,17 @@ typedef struct BTMetaPageData * an operator class may choose to offer a third amproc procedure * (BTINRANGE_PROC), independently of whether it offers sortsupport. * For full details, see doc/src/sgml/btree.sgml. + * + * To facilitate B-Tree deduplication, an operator class may choose to + * offer a forth amproc procedure (BTEQUALIMAGE_PROC). For full details, + * see doc/src/sgml/btree.sgml. */ #define BTORDER_PROC 1 #define BTSORTSUPPORT_PROC 2 #define BTINRANGE_PROC 3 -#define BTNProcs 3 +#define BTEQUALIMAGE_PROC 4 +#define BTNProcs 4 /* * We need to be able to tell the difference between read and write @@ -829,6 +834,7 @@ extern bool _bt_check_natts(Relation rel, bool heapkeyspace, Page page, OffsetNumber offnum); extern void _bt_check_third_page(Relation rel, Relation heap, bool needheaptidspace, Page page, IndexTuple newtup); +extern bool _bt_allequalimage(Relation rel, bool debugmessage); /* * prototypes for functions in nbtvalidate.c diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 2fe64b9d19..1a5e5ce8d1 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202002191 +#define CATALOG_VERSION_NO 202002261 #endif diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index c67768fcab..75c0152b66 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -17,23 +17,36 @@ amprocrighttype => 'anyarray', amprocnum => '1', amproc => 'btarraycmp' }, { amprocfamily => 'btree/bit_ops', amproclefttype => 'bit', amprocrighttype => 'bit', amprocnum => '1', amproc => 'bitcmp' }, +{ amprocfamily => 'btree/bit_ops', amproclefttype => 'bit', + amprocrighttype => 'bit', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/bool_ops', amproclefttype => 'bool', amprocrighttype => 'bool', amprocnum => '1', amproc => 'btboolcmp' }, +{ amprocfamily => 'btree/bool_ops', amproclefttype => 'bool', + amprocrighttype => 'bool', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/bpchar_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '1', amproc => 'bpcharcmp' }, { amprocfamily => 'btree/bpchar_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '2', amproc => 'bpchar_sortsupport' }, +{ amprocfamily => 'btree/bpchar_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '4', + amproc => 'btvarstrequalimage' }, { amprocfamily => 'btree/bytea_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '1', amproc => 'byteacmp' }, { amprocfamily => 'btree/bytea_ops', amproclefttype => 'bytea', amprocrighttype => 'bytea', amprocnum => '2', amproc => 'bytea_sortsupport' }, +{ amprocfamily => 'btree/bytea_ops', amproclefttype => 'bytea', + amprocrighttype => 'bytea', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/char_ops', amproclefttype => 'char', amprocrighttype => 'char', amprocnum => '1', amproc => 'btcharcmp' }, +{ amprocfamily => 'btree/char_ops', amproclefttype => 'char', + amprocrighttype => 'char', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', amprocrighttype => 'date', amprocnum => '1', amproc => 'date_cmp' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', amprocrighttype => 'date', amprocnum => '2', amproc => 'date_sortsupport' }, +{ amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', + amprocrighttype => 'date', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'date', amprocrighttype => 'timestamp', amprocnum => '1', amproc => 'date_cmp_timestamp' }, @@ -45,6 +58,8 @@ { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', amprocrighttype => 'timestamp', amprocnum => '2', amproc => 'timestamp_sortsupport' }, +{ amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', + amprocrighttype => 'timestamp', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', amprocrighttype => 'date', amprocnum => '1', amproc => 'timestamp_cmp_date' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamp', @@ -56,6 +71,9 @@ { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamptz', amprocrighttype => 'timestamptz', amprocnum => '2', amproc => 'timestamp_sortsupport' }, +{ amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamptz', + amprocrighttype => 'timestamptz', amprocnum => '4', + amproc => 'btequalimage' }, { amprocfamily => 'btree/datetime_ops', amproclefttype => 'timestamptz', amprocrighttype => 'date', amprocnum => '1', amproc => 'timestamptz_cmp_date' }, @@ -96,10 +114,14 @@ { amprocfamily => 'btree/network_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '2', amproc => 'network_sortsupport' }, +{ amprocfamily => 'btree/network_ops', amproclefttype => 'inet', + amprocrighttype => 'inet', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int2', amprocnum => '1', amproc => 'btint2cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int2', amprocnum => '2', amproc => 'btint2sortsupport' }, +{ amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', + amprocrighttype => 'int2', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', amprocrighttype => 'int4', amprocnum => '1', amproc => 'btint24cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int2', @@ -117,6 +139,8 @@ amprocrighttype => 'int4', amprocnum => '1', amproc => 'btint4cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', amprocrighttype => 'int4', amprocnum => '2', amproc => 'btint4sortsupport' }, +{ amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', + amprocrighttype => 'int4', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', amprocrighttype => 'int8', amprocnum => '1', amproc => 'btint48cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int4', @@ -134,6 +158,8 @@ amprocrighttype => 'int8', amprocnum => '1', amproc => 'btint8cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', amprocrighttype => 'int8', amprocnum => '2', amproc => 'btint8sortsupport' }, +{ amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', + amprocrighttype => 'int8', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', amprocrighttype => 'int4', amprocnum => '1', amproc => 'btint84cmp' }, { amprocfamily => 'btree/integer_ops', amproclefttype => 'int8', @@ -146,11 +172,15 @@ { amprocfamily => 'btree/interval_ops', amproclefttype => 'interval', amprocrighttype => 'interval', amprocnum => '3', amproc => 'in_range(interval,interval,interval,bool,bool)' }, +{ amprocfamily => 'btree/interval_ops', amproclefttype => 'interval', + amprocrighttype => 'interval', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr', amprocrighttype => 'macaddr', amprocnum => '1', amproc => 'macaddr_cmp' }, { amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr', amprocrighttype => 'macaddr', amprocnum => '2', amproc => 'macaddr_sortsupport' }, +{ amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr', + amprocrighttype => 'macaddr', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/numeric_ops', amproclefttype => 'numeric', amprocrighttype => 'numeric', amprocnum => '1', amproc => 'numeric_cmp' }, { amprocfamily => 'btree/numeric_ops', amproclefttype => 'numeric', @@ -163,62 +193,92 @@ amprocrighttype => 'oid', amprocnum => '1', amproc => 'btoidcmp' }, { amprocfamily => 'btree/oid_ops', amproclefttype => 'oid', amprocrighttype => 'oid', amprocnum => '2', amproc => 'btoidsortsupport' }, +{ amprocfamily => 'btree/oid_ops', amproclefttype => 'oid', + amprocrighttype => 'oid', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/oidvector_ops', amproclefttype => 'oidvector', amprocrighttype => 'oidvector', amprocnum => '1', amproc => 'btoidvectorcmp' }, +{ amprocfamily => 'btree/oidvector_ops', amproclefttype => 'oidvector', + amprocrighttype => 'oidvector', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/text_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '1', amproc => 'bttextcmp' }, { amprocfamily => 'btree/text_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '2', amproc => 'bttextsortsupport' }, +{ amprocfamily => 'btree/text_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '4', amproc => 'btvarstrequalimage' }, { amprocfamily => 'btree/text_ops', amproclefttype => 'name', amprocrighttype => 'name', amprocnum => '1', amproc => 'btnamecmp' }, { amprocfamily => 'btree/text_ops', amproclefttype => 'name', amprocrighttype => 'name', amprocnum => '2', amproc => 'btnamesortsupport' }, +{ amprocfamily => 'btree/text_ops', amproclefttype => 'name', + amprocrighttype => 'name', amprocnum => '4', amproc => 'btvarstrequalimage' }, { amprocfamily => 'btree/text_ops', amproclefttype => 'name', amprocrighttype => 'text', amprocnum => '1', amproc => 'btnametextcmp' }, { amprocfamily => 'btree/text_ops', amproclefttype => 'text', amprocrighttype => 'name', amprocnum => '1', amproc => 'bttextnamecmp' }, { amprocfamily => 'btree/time_ops', amproclefttype => 'time', amprocrighttype => 'time', amprocnum => '1', amproc => 'time_cmp' }, +{ amprocfamily => 'btree/time_ops', amproclefttype => 'time', + amprocrighttype => 'time', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/time_ops', amproclefttype => 'time', amprocrighttype => 'interval', amprocnum => '3', amproc => 'in_range(time,time,interval,bool,bool)' }, { amprocfamily => 'btree/timetz_ops', amproclefttype => 'timetz', amprocrighttype => 'timetz', amprocnum => '1', amproc => 'timetz_cmp' }, +{ amprocfamily => 'btree/timetz_ops', amproclefttype => 'timetz', + amprocrighttype => 'timetz', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/timetz_ops', amproclefttype => 'timetz', amprocrighttype => 'interval', amprocnum => '3', amproc => 'in_range(timetz,timetz,interval,bool,bool)' }, { amprocfamily => 'btree/varbit_ops', amproclefttype => 'varbit', amprocrighttype => 'varbit', amprocnum => '1', amproc => 'varbitcmp' }, +{ amprocfamily => 'btree/varbit_ops', amproclefttype => 'varbit', + amprocrighttype => 'varbit', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/text_pattern_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '1', amproc => 'bttext_pattern_cmp' }, { amprocfamily => 'btree/text_pattern_ops', amproclefttype => 'text', amprocrighttype => 'text', amprocnum => '2', amproc => 'bttext_pattern_sortsupport' }, +{ amprocfamily => 'btree/text_pattern_ops', amproclefttype => 'text', + amprocrighttype => 'text', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/bpchar_pattern_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '1', amproc => 'btbpchar_pattern_cmp' }, { amprocfamily => 'btree/bpchar_pattern_ops', amproclefttype => 'bpchar', amprocrighttype => 'bpchar', amprocnum => '2', amproc => 'btbpchar_pattern_sortsupport' }, +{ amprocfamily => 'btree/bpchar_pattern_ops', amproclefttype => 'bpchar', + amprocrighttype => 'bpchar', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/money_ops', amproclefttype => 'money', amprocrighttype => 'money', amprocnum => '1', amproc => 'cash_cmp' }, +{ amprocfamily => 'btree/money_ops', amproclefttype => 'money', + amprocrighttype => 'money', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/tid_ops', amproclefttype => 'tid', amprocrighttype => 'tid', amprocnum => '1', amproc => 'bttidcmp' }, +{ amprocfamily => 'btree/tid_ops', amproclefttype => 'tid', + amprocrighttype => 'tid', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/uuid_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '1', amproc => 'uuid_cmp' }, { amprocfamily => 'btree/uuid_ops', amproclefttype => 'uuid', amprocrighttype => 'uuid', amprocnum => '2', amproc => 'uuid_sortsupport' }, +{ amprocfamily => 'btree/uuid_ops', amproclefttype => 'uuid', + amprocrighttype => 'uuid', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/record_ops', amproclefttype => 'record', amprocrighttype => 'record', amprocnum => '1', amproc => 'btrecordcmp' }, { amprocfamily => 'btree/record_image_ops', amproclefttype => 'record', amprocrighttype => 'record', amprocnum => '1', amproc => 'btrecordimagecmp' }, { amprocfamily => 'btree/pg_lsn_ops', amproclefttype => 'pg_lsn', amprocrighttype => 'pg_lsn', amprocnum => '1', amproc => 'pg_lsn_cmp' }, +{ amprocfamily => 'btree/pg_lsn_ops', amproclefttype => 'pg_lsn', + amprocrighttype => 'pg_lsn', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/macaddr8_ops', amproclefttype => 'macaddr8', amprocrighttype => 'macaddr8', amprocnum => '1', amproc => 'macaddr8_cmp' }, +{ amprocfamily => 'btree/macaddr8_ops', amproclefttype => 'macaddr8', + amprocrighttype => 'macaddr8', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/enum_ops', amproclefttype => 'anyenum', amprocrighttype => 'anyenum', amprocnum => '1', amproc => 'enum_cmp' }, +{ amprocfamily => 'btree/enum_ops', amproclefttype => 'anyenum', + amprocrighttype => 'anyenum', amprocnum => '4', amproc => 'btequalimage' }, { amprocfamily => 'btree/tsvector_ops', amproclefttype => 'tsvector', amprocrighttype => 'tsvector', amprocnum => '1', amproc => 'tsvector_cmp' }, { amprocfamily => 'btree/tsquery_ops', amproclefttype => 'tsquery', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index eb3c1a88d1..07a86c7b7b 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1013,6 +1013,9 @@ { oid => '3255', descr => 'sort support', proname => 'bttextsortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'bttextsortsupport' }, +{ oid => '8505', descr => 'equal image', + proname => 'btvarstrequalimage', prorettype => 'bool', proargtypes => 'oid', + prosrc => 'btvarstrequalimage' }, { oid => '377', descr => 'less-equal-greater', proname => 'cash_cmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'money money', prosrc => 'cash_cmp' }, @@ -9483,6 +9486,9 @@ { oid => '3187', descr => 'less-equal-greater based on byte images', proname => 'btrecordimagecmp', prorettype => 'int4', proargtypes => 'record record', prosrc => 'btrecordimagecmp' }, +{ oid => '8506', descr => 'equal image', + proname => 'btequalimage', prorettype => 'bool', proargtypes => 'oid', + prosrc => 'btequalimage' }, # Extensions { oid => '3082', descr => 'list available extensions', diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index ac5183c90e..ba5ce7a17e 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -354,9 +354,9 @@ ERROR: invalid operator number 0, must be between 1 and 5 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD OPERATOR 1 < ; -- operator without argument types ERROR: operator argument types must be specified in ALTER OPERATOR FAMILY ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 0 btint42cmp(int4, int2); -- function number should be between 1 and 5 -ERROR: invalid function number 0, must be between 1 and 3 +ERROR: invalid function number 0, must be between 1 and 4 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD FUNCTION 6 btint42cmp(int4, int2); -- function number should be between 1 and 5 -ERROR: invalid function number 6, must be between 1 and 3 +ERROR: invalid function number 6, must be between 1 and 4 ALTER OPERATOR FAMILY alt_opf4 USING btree ADD STORAGE invalid_storage; -- Ensure STORAGE is not a part of ALTER OPERATOR FAMILY ERROR: STORAGE cannot be specified in ALTER OPERATOR FAMILY DROP OPERATOR FAMILY alt_opf4 USING btree; @@ -493,6 +493,10 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD OPERATOR 4 >= (int4, int2) , OPERATOR 5 > (int4, int2) , FUNCTION 1 btint42cmp(int4, int2); +-- Should fail. Not allowed to have cross-type equalimage function. +ALTER OPERATOR FAMILY alt_opf18 USING btree + ADD FUNCTION 4 (int4, int2) btequalimage(oid); +ERROR: btree equal image functions must not be cross-type ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18" DROP OPERATOR FAMILY alt_opf18 USING btree; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index c19740e5db..fb6c029e3d 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -2111,6 +2111,42 @@ WHERE p1.amproc = p2.oid AND --------------+--------+-------- (0 rows) +-- Almost all of the core distribution's Btree opclasses can use one of the +-- two generic "equalimage" functions as their support function 4. Look for +-- opclasses that don't allow deduplication unconditionally here. +-- +-- Newly added Btree opclasses don't have to support deduplication. It will +-- usually be trivial to add support, though. Note that the expected output +-- of this part of the test will need to be updated when a new opclass cannot +-- support deduplication (by using btequalimage). +SELECT amp.amproc::regproc AS proc, opf.opfname AS opfamily_name, + opc.opcname AS opclass_name, opc.opcintype::regtype AS opcintype +FROM pg_am AS am +JOIN pg_opclass AS opc ON opc.opcmethod = am.oid +JOIN pg_opfamily AS opf ON opc.opcfamily = opf.oid +LEFT JOIN pg_amproc AS amp ON amp.amprocfamily = opf.oid AND + amp.amproclefttype = opc.opcintype AND amp.amprocnum = 4 +WHERE am.amname = 'btree' AND + amp.amproc IS DISTINCT FROM 'btequalimage'::regproc +ORDER BY 1, 2, 3; + proc | opfamily_name | opclass_name | opcintype +--------------------+------------------+------------------+------------------ + btvarstrequalimage | bpchar_ops | bpchar_ops | character + btvarstrequalimage | text_ops | name_ops | name + btvarstrequalimage | text_ops | text_ops | text + btvarstrequalimage | text_ops | varchar_ops | text + | array_ops | array_ops | anyarray + | float_ops | float4_ops | real + | float_ops | float8_ops | double precision + | jsonb_ops | jsonb_ops | jsonb + | numeric_ops | numeric_ops | numeric + | range_ops | range_ops | anyrange + | record_image_ops | record_image_ops | record + | record_ops | record_ops | record + | tsquery_ops | tsquery_ops | tsquery + | tsvector_ops | tsvector_ops | tsvector +(14 rows) + -- **************** pg_index **************** -- Look for illegal values in pg_index fields. SELECT p1.indexrelid, p1.indrelid diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index 9eeea2a87e..223d66bc2d 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -430,6 +430,9 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD OPERATOR 4 >= (int4, int2) , OPERATOR 5 > (int4, int2) , FUNCTION 1 btint42cmp(int4, int2); +-- Should fail. Not allowed to have cross-type equalimage function. +ALTER OPERATOR FAMILY alt_opf18 USING btree + ADD FUNCTION 4 (int4, int2) btequalimage(oid); ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); DROP OPERATOR FAMILY alt_opf18 USING btree; diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 624bea46ce..8351b6469a 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -1323,6 +1323,24 @@ WHERE p1.amproc = p2.oid AND p1.amproclefttype != p1.amprocrighttype AND p2.provolatile = 'v'; +-- Almost all of the core distribution's Btree opclasses can use one of the +-- two generic "equalimage" functions as their support function 4. Look for +-- opclasses that don't allow deduplication unconditionally here. +-- +-- Newly added Btree opclasses don't have to support deduplication. It will +-- usually be trivial to add support, though. Note that the expected output +-- of this part of the test will need to be updated when a new opclass cannot +-- support deduplication (by using btequalimage). +SELECT amp.amproc::regproc AS proc, opf.opfname AS opfamily_name, + opc.opcname AS opclass_name, opc.opcintype::regtype AS opcintype +FROM pg_am AS am +JOIN pg_opclass AS opc ON opc.opcmethod = am.oid +JOIN pg_opfamily AS opf ON opc.opcfamily = opf.oid +LEFT JOIN pg_amproc AS amp ON amp.amprocfamily = opf.oid AND + amp.amproclefttype = opc.opcintype AND amp.amprocnum = 4 +WHERE am.amname = 'btree' AND + amp.amproc IS DISTINCT FROM 'btequalimage'::regproc +ORDER BY 1, 2, 3; -- **************** pg_index ****************