diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 73764f2bed..0809a6d2e9 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -8434,8 +8434,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple shows the operators available for the cidr and inet types. The operators <<, - <<=, >>, and - >>= test for subnet inclusion. They + <<=, >>, + >>=, and && + test for subnet inclusion. They consider only the network parts of the two addresses (ignoring any host part) and determine whether one network is identical to or a subnet of the other. @@ -8484,12 +8485,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple << - is contained within + is contained by inet '192.168.1.5' << inet '192.168.1/24' <<= - is contained within or equals + is contained by or equals inet '192.168.1/24' <<= inet '192.168.1/24' @@ -8502,6 +8503,11 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple contains or equals inet '192.168.1/24' >>= inet '192.168.1/24' + + && + contains or is contained by + inet '192.168.1/24' && inet '192.168.1.80/28' + ~ bitwise NOT diff --git a/doc/src/sgml/gin.sgml b/doc/src/sgml/gin.sgml index 561608f8fa..576ad3005a 100644 --- a/doc/src/sgml/gin.sgml +++ b/doc/src/sgml/gin.sgml @@ -62,6 +62,365 @@ + + Built-in Operator Classes + + + The core PostgreSQL distribution + includes the GIN operator classes shown in + . + (Some of the optional modules described in + provide additional GIN operator classes.) + + + + Built-in <acronym>GIN</acronym> Operator Classes + + + + Name + Indexed Data Type + Indexable Operators + + + + + _abstime_ops + abstime[] + + && + <@ + = + @> + + + + _bit_ops + bit[] + + && + <@ + = + @> + + + + _bool_ops + boolean[] + + && + <@ + = + @> + + + + _bpchar_ops + character[] + + && + <@ + = + @> + + + + _bytea_ops + bytea[] + + && + <@ + = + @> + + + + _char_ops + "char"[] + + && + <@ + = + @> + + + + _cidr_ops + cidr[] + + && + <@ + = + @> + + + + _date_ops + date[] + + && + <@ + = + @> + + + + _float4_ops + float4[] + + && + <@ + = + @> + + + + _float8_ops + float8[] + + && + <@ + = + @> + + + + _inet_ops + inet[] + + && + <@ + = + @> + + + + _int2_ops + smallint[] + + && + <@ + = + @> + + + + _int4_ops + integer[] + + && + <@ + = + @> + + + + _int8_ops + bigint[] + + && + <@ + = + @> + + + + _interval_ops + interval[] + + && + <@ + = + @> + + + + _macaddr_ops + macaddr[] + + && + <@ + = + @> + + + + _money_ops + money[] + + && + <@ + = + @> + + + + _name_ops + name[] + + && + <@ + = + @> + + + + _numeric_ops + numeric[] + + && + <@ + = + @> + + + + _oid_ops + oid[] + + && + <@ + = + @> + + + + _oidvector_ops + oidvector[] + + && + <@ + = + @> + + + + _reltime_ops + reltime[] + + && + <@ + = + @> + + + + _text_ops + text[] + + && + <@ + = + @> + + + + _time_ops + time[] + + && + <@ + = + @> + + + + _timestamp_ops + timestamp[] + + && + <@ + = + @> + + + + _timestamptz_ops + timestamp with time zone[] + + && + <@ + = + @> + + + + _timetz_ops + time with time zone[] + + && + <@ + = + @> + + + + _tinterval_ops + tinterval[] + + && + <@ + = + @> + + + + _varbit_ops + bit varying[] + + && + <@ + = + @> + + + + _varchar_ops + character varying[] + + && + <@ + = + @> + + + + jsonb_ops + jsonb + + ? + ?& + ?| + @> + + + + jsonb_hash_ops + jsonb + + @> + + + + tsvector_ops + tsvector + + @@ + @@@ + + + + +
+ + + Of the two operator classes for type jsonb, jsonb_ops + is the default. jsonb_hash_ops supports fewer operators but + will work with larger indexed values than jsonb_ops can support. + + +
+ Extensibility diff --git a/doc/src/sgml/gist.sgml b/doc/src/sgml/gist.sgml index ed0bc54f52..0158b1759e 100644 --- a/doc/src/sgml/gist.sgml +++ b/doc/src/sgml/gist.sgml @@ -40,6 +40,184 @@ + + Built-in Operator Classes + + + The core PostgreSQL distribution + includes the GiST operator classes shown in + . + (Some of the optional modules described in + provide additional GiST operator classes.) + + + + Built-in <acronym>GiST</acronym> Operator Classes + + + + Name + Indexed Data Type + Indexable Operators + Ordering Operators + + + + + box_ops + box + + && + &> + &< + &<| + >> + << + <<| + <@ + @> + @ + |&> + |>> + ~ + ~= + + + + + + circle_ops + circle + + && + &> + &< + &<| + >> + << + <<| + <@ + @> + @ + |&> + |>> + ~ + ~= + + + + + + inet_ops + inet, cidr + + && + >> + >>= + > + >= + <> + << + <<= + < + <= + = + + + + + + point_ops + point + + >> + >^ + << + <@ + <@ + <@ + <^ + ~= + + + <-> + + + + poly_ops + polygon + + && + &> + &< + &<| + >> + << + <<| + <@ + @> + @ + |&> + |>> + ~ + ~= + + + + + + range_ops + any range type + + && + &> + &< + >> + << + <@ + -|- + = + @> + @> + + + + + + tsquery_ops + tsquery + + <@ + @> + + + + + + tsvector_ops + tsvector + + @@ + + + + + + +
+ + + For historical reasons, the inet_ops operator class is + not the default class for types inet and cidr. + To use it, mention the class name in CREATE INDEX, + for example + +CREATE INDEX ON my_table USING gist (my_inet_column inet_ops); + + + +
+ Extensibility diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index b1c8f22718..64530a11c8 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -239,6 +239,8 @@ CREATE INDEX name ON table (See for the meaning of these operators.) + The GiST operator classes included in the standard distribution are + documented in . Many other GiST operator classes are available in the contrib collection or as separate projects. For more information see . @@ -253,6 +255,8 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10; which finds the ten places closest to a given target point. The ability to do this is again dependent on the particular operator class being used. + In , operators that can be + used in this way are listed in the column Ordering Operators. @@ -283,6 +287,8 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10; (See for the meaning of these operators.) + The SP-GiST operator classes included in the standard distribution are + documented in . For more information see . @@ -314,6 +320,8 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10; (See for the meaning of these operators.) + The GIN operator classes included in the standard distribution are + documented in . Many other GIN operator classes are available in the contrib collection or as separate projects. For more information see . @@ -1003,7 +1011,9 @@ CREATE INDEX test_index ON test_table (col varchar_pattern_ops); SELECT am.amname AS index_method, - opc.opcname AS opclass_name + opc.opcname AS opclass_name, + opc.opcintype::regtype AS indexed_type, + opc.opcdefault AS is_default FROM pg_am am, pg_opclass opc WHERE opc.opcmethod = am.oid ORDER BY index_method, opclass_name; @@ -1020,6 +1030,22 @@ SELECT am.amname AS index_method, associated with any single class within the family. + + This expanded version of the previous query shows the operator family + each operator class belongs to: + +SELECT am.amname AS index_method, + opc.opcname AS opclass_name, + opf.opfname AS opfamily_name, + opc.opcintype::regtype AS indexed_type, + opc.opcdefault AS is_default + FROM pg_am am, pg_opclass opc, pg_opfamily opf + WHERE opc.opcmethod = am.oid AND + opc.opcfamily = opf.oid + ORDER BY index_method, opclass_name; + + + This query shows all defined operator families and all the operators included in each family: diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml index a043ffb06c..56827e520d 100644 --- a/doc/src/sgml/spgist.sgml +++ b/doc/src/sgml/spgist.sgml @@ -53,6 +53,93 @@ + + Built-in Operator Classes + + + The core PostgreSQL distribution + includes the SP-GiST operator classes shown in + . + + + + Built-in <acronym>SP-GiST</acronym> Operator Classes + + + + Name + Indexed Data Type + Indexable Operators + + + + + kd_point_ops + point + + << + <@ + <^ + >> + >^ + ~= + + + + quad_point_ops + point + + << + <@ + <^ + >> + >^ + ~= + + + + range_ops + any range type + + && + &< + &> + -|- + << + <@ + = + >> + @> + + + + text_ops + text + + < + <= + = + > + >= + ~<=~ + ~<~ + ~>=~ + ~>~ + + + + +
+ + + Of the two operator classes for type point, + quad_point_ops is the default. kd_point_ops + supports the same operators but uses a different index data structure which + may offer better performance in some applications. + + +
+ Extensibility diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 6b23069e26..7b4391bba1 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -23,7 +23,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ geo_ops.o geo_selfuncs.o inet_cidr_ntop.o inet_net_pton.o int.o \ int8.o json.o jsonb.o jsonb_gin.o jsonb_op.o jsonb_util.o \ jsonfuncs.o like.o lockfuncs.o mac.o misc.o nabstime.o name.o \ - network.o numeric.o numutils.o oid.o oracle_compat.o \ + network.o network_gist.o network_selfuncs.o \ + numeric.o numutils.o oid.o oracle_compat.o \ orderedsetaggs.o pg_lzcompress.o pg_locale.o pg_lsn.o \ pgstatfuncs.o pseudotypes.o quote.o rangetypes.o rangetypes_gist.o \ rangetypes_selfuncs.o rangetypes_spgist.o rangetypes_typanalyze.o \ diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index 5e837fab2d..8bdf5778d8 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -23,58 +23,9 @@ static int32 network_cmp_internal(inet *a1, inet *a2); -static int bitncmp(void *l, void *r, int n); static bool addressOK(unsigned char *a, int bits, int family); -static int ip_addrsize(inet *inetptr); static inet *internal_inetpl(inet *ip, int64 addend); -/* - * Access macros. We use VARDATA_ANY so that we can process short-header - * varlena values without detoasting them. This requires a trick: - * VARDATA_ANY assumes the varlena header is already filled in, which is - * not the case when constructing a new value (until SET_INET_VARSIZE is - * called, which we typically can't do till the end). Therefore, we - * always initialize the newly-allocated value to zeroes (using palloc0). - * A zero length word will look like the not-1-byte case to VARDATA_ANY, - * and so we correctly construct an uncompressed value. - * - * Note that ip_maxbits() and SET_INET_VARSIZE() require - * the family field to be set correctly. - */ - -#define ip_family(inetptr) \ - (((inet_struct *) VARDATA_ANY(inetptr))->family) - -#define ip_bits(inetptr) \ - (((inet_struct *) VARDATA_ANY(inetptr))->bits) - -#define ip_addr(inetptr) \ - (((inet_struct *) VARDATA_ANY(inetptr))->ipaddr) - -#define ip_maxbits(inetptr) \ - (ip_family(inetptr) == PGSQL_AF_INET ? 32 : 128) - -#define SET_INET_VARSIZE(dst) \ - SET_VARSIZE(dst, VARHDRSZ + offsetof(inet_struct, ipaddr) + \ - ip_addrsize(dst)) - - -/* - * Return the number of bytes of address storage needed for this data type. - */ -static int -ip_addrsize(inet *inetptr) -{ - switch (ip_family(inetptr)) - { - case PGSQL_AF_INET: - return 4; - case PGSQL_AF_INET6: - return 16; - default: - return 0; - } -} /* * Common INET/CIDR input routine @@ -596,6 +547,21 @@ network_supeq(PG_FUNCTION_ARGS) PG_RETURN_BOOL(false); } +Datum +network_overlap(PG_FUNCTION_ARGS) +{ + inet *a1 = PG_GETARG_INET_PP(0); + inet *a2 = PG_GETARG_INET_PP(1); + + if (ip_family(a1) == ip_family(a2)) + { + PG_RETURN_BOOL(bitncmp(ip_addr(a1), ip_addr(a2), + Min(ip_bits(a1), ip_bits(a2))) == 0); + } + + PG_RETURN_BOOL(false); +} + /* * Extract data from a network datatype. */ @@ -962,10 +928,10 @@ convert_network_to_scalar(Datum value, Oid typid) * author: * Paul Vixie (ISC), June 1996 */ -static int -bitncmp(void *l, void *r, int n) +int +bitncmp(const unsigned char *l, const unsigned char *r, int n) { - u_int lb, + unsigned int lb, rb; int x, b; @@ -975,8 +941,8 @@ bitncmp(void *l, void *r, int n) if (x || (n % 8) == 0) return x; - lb = ((const u_char *) l)[b]; - rb = ((const u_char *) r)[b]; + lb = l[b]; + rb = r[b]; for (b = n % 8; b > 0; b--) { if (IS_HIGHBIT_SET(lb) != IS_HIGHBIT_SET(rb)) @@ -991,6 +957,49 @@ bitncmp(void *l, void *r, int n) return 0; } +/* + * bitncommon: compare bit masks l and r, for up to n bits. + * + * Returns the number of leading bits that match (0 to n). + */ +int +bitncommon(const unsigned char *l, const unsigned char *r, int n) +{ + int byte, + nbits; + + /* number of bits to examine in last byte */ + nbits = n % 8; + + /* check whole bytes */ + for (byte = 0; byte < n / 8; byte++) + { + if (l[byte] != r[byte]) + { + /* at least one bit in the last byte is not common */ + nbits = 7; + break; + } + } + + /* check bits in last partial byte */ + if (nbits != 0) + { + /* calculate diff of first non-matching bytes */ + unsigned int diff = l[byte] ^ r[byte]; + + /* compare the bits from the most to the least */ + while ((diff >> (8 - nbits)) != 0) + nbits--; + } + + return (8 * byte) + nbits; +} + + +/* + * Verify a CIDR address is OK (doesn't have bits set past the masklen) + */ static bool addressOK(unsigned char *a, int bits, int family) { diff --git a/src/backend/utils/adt/network_gist.c b/src/backend/utils/adt/network_gist.c new file mode 100644 index 0000000000..0a826ae90a --- /dev/null +++ b/src/backend/utils/adt/network_gist.c @@ -0,0 +1,789 @@ +/*------------------------------------------------------------------------- + * + * network_gist.c + * GiST support for network types. + * + * The key thing to understand about this code is the definition of the + * "union" of a set of INET/CIDR values. It works like this: + * 1. If the values are not all of the same IP address family, the "union" + * is a dummy value with family number zero, minbits zero, commonbits zero, + * address all zeroes. Otherwise: + * 2. The union has the common IP address family number. + * 3. The union's minbits value is the smallest netmask length ("ip_bits") + * of all the input values. + * 4. Let C be the number of leading address bits that are in common among + * all the input values (C ranges from 0 to ip_maxbits for the family). + * 5. The union's commonbits value is C. + * 6. The union's address value is the same as the common prefix for its + * first C bits, and is zeroes to the right of that. The physical width + * of the address value is ip_maxbits for the address family. + * + * In a leaf index entry (representing a single key), commonbits is equal to + * ip_maxbits for the address family, minbits is the same as the represented + * value's ip_bits, and the address is equal to the represented address. + * Although it may appear that we're wasting a byte by storing the union + * format and not just the represented INET/CIDR value in leaf keys, the + * extra byte is actually "free" because of alignment considerations. + * + * Note that this design tracks minbits and commonbits independently; in any + * given union value, either might be smaller than the other. This does not + * help us much when descending the tree, because of the way inet comparison + * is defined: at non-leaf nodes we can't compare more than minbits bits + * even if we know them. However, it greatly improves the quality of split + * decisions. Preliminary testing suggests that searches are as much as + * twice as fast as for a simpler design in which a single field doubles as + * the common prefix length and the minimum ip_bits value. + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/network_gist.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/gist.h" +#include "access/skey.h" +#include "utils/inet.h" + +/* + * Operator strategy numbers used in the GiST inet_ops opclass + */ +#define INETSTRAT_OVERLAPS 3 +#define INETSTRAT_EQ 18 +#define INETSTRAT_NE 19 +#define INETSTRAT_LT 20 +#define INETSTRAT_LE 21 +#define INETSTRAT_GT 22 +#define INETSTRAT_GE 23 +#define INETSTRAT_SUB 24 +#define INETSTRAT_SUBEQ 25 +#define INETSTRAT_SUP 26 +#define INETSTRAT_SUPEQ 27 + + +/* + * Representation of a GiST INET/CIDR index key. This is not identical to + * INET/CIDR because we need to keep track of the length of the common address + * prefix as well as the minimum netmask length. However, as long as it + * follows varlena header rules, the core GiST code won't know the difference. + * For simplicity we always use 1-byte-header varlena format. + */ +typedef struct GistInetKey +{ + uint8 va_header; /* varlena header --- don't touch directly */ + unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */ + unsigned char minbits; /* minimum number of bits in netmask */ + unsigned char commonbits; /* number of common prefix bits in addresses */ + unsigned char ipaddr[16]; /* up to 128 bits of common address */ +} GistInetKey; + +#define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X)) +#define InetKeyPGetDatum(X) PointerGetDatum(X) + +/* + * Access macros; not really exciting, but we use these for notational + * consistency with access to INET/CIDR values. Note that family-zero values + * are stored with 4 bytes of address, not 16. + */ +#define gk_ip_family(gkptr) ((gkptr)->family) +#define gk_ip_minbits(gkptr) ((gkptr)->minbits) +#define gk_ip_commonbits(gkptr) ((gkptr)->commonbits) +#define gk_ip_addr(gkptr) ((gkptr)->ipaddr) +#define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32) + +/* These require that the family field has been set: */ +#define gk_ip_addrsize(gkptr) \ + (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4) +#define gk_ip_maxbits(gkptr) \ + ip_family_maxbits(gk_ip_family(gkptr)) +#define SET_GK_VARSIZE(dst) \ + SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst)) + + +/* + * The GiST query consistency check + */ +Datum +inet_gist_consistent(PG_FUNCTION_ARGS) +{ + GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0); + inet *query = PG_GETARG_INET_PP(1); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + + /* Oid subtype = PG_GETARG_OID(3); */ + bool *recheck = (bool *) PG_GETARG_POINTER(4); + GistInetKey *key = DatumGetInetKeyP(ent->key); + int minbits, + order; + + /* All operators served by this function are exact. */ + *recheck = false; + + /* + * Check 0: different families + * + * If key represents multiple address families, its children could match + * anything. This can only happen on an inner index page. + */ + if (gk_ip_family(key) == 0) + { + Assert(!GIST_LEAF(ent)); + PG_RETURN_BOOL(true); + } + + /* + * Check 1: different families + * + * Matching families do not help any of the strategies. + */ + if (gk_ip_family(key) != ip_family(query)) + { + switch (strategy) + { + case INETSTRAT_LT: + case INETSTRAT_LE: + if (gk_ip_family(key) < ip_family(query)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_GE: + case INETSTRAT_GT: + if (gk_ip_family(key) > ip_family(query)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_NE: + PG_RETURN_BOOL(true); + } + /* For all other cases, we can be sure there is no match */ + PG_RETURN_BOOL(false); + } + + /* + * Check 2: network bit count + * + * Network bit count (ip_bits) helps to check leaves for sub network and + * sup network operators. At non-leaf nodes, we know every child value + * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some + * cases too. + */ + switch (strategy) + { + case INETSTRAT_SUB: + if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_SUBEQ: + if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_SUPEQ: + case INETSTRAT_EQ: + if (gk_ip_minbits(key) > ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_SUP: + if (gk_ip_minbits(key) >= ip_bits(query)) + PG_RETURN_BOOL(false); + break; + } + + /* + * Check 3: common network bits + * + * Compare available common prefix bits to the query, but not beyond + * either the query's netmask or the minimum netmask among the represented + * values. If these bits don't match the query, we have our answer (and + * may or may not need to descend, depending on the operator). If they do + * match, and we are not at a leaf, we descend in all cases. + * + * Note this is the final check for operators that only consider the + * network part of the address. + */ + minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key)); + minbits = Min(minbits, ip_bits(query)); + + order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits); + + switch (strategy) + { + case INETSTRAT_SUB: + case INETSTRAT_SUBEQ: + case INETSTRAT_OVERLAPS: + case INETSTRAT_SUPEQ: + case INETSTRAT_SUP: + PG_RETURN_BOOL(order == 0); + + case INETSTRAT_LT: + case INETSTRAT_LE: + if (order > 0) + PG_RETURN_BOOL(false); + if (order < 0 || !GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_EQ: + if (order != 0) + PG_RETURN_BOOL(false); + if (!GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_GE: + case INETSTRAT_GT: + if (order < 0) + PG_RETURN_BOOL(false); + if (order > 0 || !GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + + case INETSTRAT_NE: + if (order != 0 || !GIST_LEAF(ent)) + PG_RETURN_BOOL(true); + break; + } + + /* + * Remaining checks are only for leaves and basic comparison strategies. + * See network_cmp_internal() in network.c for the implementation we need + * to match. Note that in a leaf key, commonbits should equal the address + * length, so we compared the whole network parts above. + */ + Assert(GIST_LEAF(ent)); + + /* + * Check 4: network bit count + * + * Next step is to compare netmask widths. + */ + switch (strategy) + { + case INETSTRAT_LT: + case INETSTRAT_LE: + if (gk_ip_minbits(key) < ip_bits(query)) + PG_RETURN_BOOL(true); + if (gk_ip_minbits(key) > ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_EQ: + if (gk_ip_minbits(key) != ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_GE: + case INETSTRAT_GT: + if (gk_ip_minbits(key) > ip_bits(query)) + PG_RETURN_BOOL(true); + if (gk_ip_minbits(key) < ip_bits(query)) + PG_RETURN_BOOL(false); + break; + + case INETSTRAT_NE: + if (gk_ip_minbits(key) != ip_bits(query)) + PG_RETURN_BOOL(true); + break; + } + + /* + * Check 5: whole address + * + * Netmask bit counts are the same, so check all the address bits. + */ + order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key)); + + switch (strategy) + { + case INETSTRAT_LT: + PG_RETURN_BOOL(order < 0); + + case INETSTRAT_LE: + PG_RETURN_BOOL(order <= 0); + + case INETSTRAT_EQ: + PG_RETURN_BOOL(order == 0); + + case INETSTRAT_GE: + PG_RETURN_BOOL(order >= 0); + + case INETSTRAT_GT: + PG_RETURN_BOOL(order > 0); + + case INETSTRAT_NE: + PG_RETURN_BOOL(order != 0); + } + + elog(ERROR, "unknown strategy for inet GiST"); + PG_RETURN_BOOL(false); /* keep compiler quiet */ +} + +/* + * Calculate parameters of the union of some GistInetKeys. + * + * Examine the keys in elements m..n inclusive of the GISTENTRY array, + * and compute these output parameters: + * *minfamily_p = minimum IP address family number + * *maxfamily_p = maximum IP address family number + * *minbits_p = minimum netmask width + * *commonbits_p = number of leading bits in common among the addresses + * + * minbits and commonbits are forced to zero if there's more than one + * address family. + */ +static void +calc_inet_union_params(GISTENTRY *ent, + int m, int n, + int *minfamily_p, + int *maxfamily_p, + int *minbits_p, + int *commonbits_p) +{ + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp; + int i; + + /* Must be at least one key. */ + Assert(m <= n); + + /* Initialize variables using the first key. */ + tmp = DatumGetInetKeyP(ent[m].key); + minfamily = maxfamily = gk_ip_family(tmp); + minbits = gk_ip_minbits(tmp); + commonbits = gk_ip_commonbits(tmp); + addr = gk_ip_addr(tmp); + + /* Scan remaining keys. */ + for (i = m + 1; i <= n; i++) + { + tmp = DatumGetInetKeyP(ent[i].key); + + /* Determine range of family numbers */ + if (minfamily > gk_ip_family(tmp)) + minfamily = gk_ip_family(tmp); + if (maxfamily < gk_ip_family(tmp)) + maxfamily = gk_ip_family(tmp); + + /* Find minimum minbits */ + if (minbits > gk_ip_minbits(tmp)) + minbits = gk_ip_minbits(tmp); + + /* Find minimum number of bits in common */ + if (commonbits > gk_ip_commonbits(tmp)) + commonbits = gk_ip_commonbits(tmp); + if (commonbits > 0) + commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits); + } + + /* Force minbits/commonbits to zero if more than one family. */ + if (minfamily != maxfamily) + minbits = commonbits = 0; + + *minfamily_p = minfamily; + *maxfamily_p = maxfamily; + *minbits_p = minbits; + *commonbits_p = commonbits; +} + +/* + * Same as above, but the GISTENTRY elements to examine are those with + * indices listed in the offsets[] array. + */ +static void +calc_inet_union_params_indexed(GISTENTRY *ent, + OffsetNumber *offsets, int noffsets, + int *minfamily_p, + int *maxfamily_p, + int *minbits_p, + int *commonbits_p) +{ + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp; + int i; + + /* Must be at least one key. */ + Assert(noffsets > 0); + + /* Initialize variables using the first key. */ + tmp = DatumGetInetKeyP(ent[offsets[0]].key); + minfamily = maxfamily = gk_ip_family(tmp); + minbits = gk_ip_minbits(tmp); + commonbits = gk_ip_commonbits(tmp); + addr = gk_ip_addr(tmp); + + /* Scan remaining keys. */ + for (i = 1; i < noffsets; i++) + { + tmp = DatumGetInetKeyP(ent[offsets[i]].key); + + /* Determine range of family numbers */ + if (minfamily > gk_ip_family(tmp)) + minfamily = gk_ip_family(tmp); + if (maxfamily < gk_ip_family(tmp)) + maxfamily = gk_ip_family(tmp); + + /* Find minimum minbits */ + if (minbits > gk_ip_minbits(tmp)) + minbits = gk_ip_minbits(tmp); + + /* Find minimum number of bits in common */ + if (commonbits > gk_ip_commonbits(tmp)) + commonbits = gk_ip_commonbits(tmp); + if (commonbits > 0) + commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits); + } + + /* Force minbits/commonbits to zero if more than one family. */ + if (minfamily != maxfamily) + minbits = commonbits = 0; + + *minfamily_p = minfamily; + *maxfamily_p = maxfamily; + *minbits_p = minbits; + *commonbits_p = commonbits; +} + +/* + * Construct a GistInetKey representing a union value. + * + * Inputs are the family/minbits/commonbits values to use, plus a pointer to + * the address field of one of the union inputs. (Since we're going to copy + * just the bits-in-common, it doesn't matter which one.) + */ +static GistInetKey * +build_inet_union_key(int family, int minbits, int commonbits, + unsigned char *addr) +{ + GistInetKey *result; + + /* Make sure any unused bits are zeroed. */ + result = (GistInetKey *) palloc0(sizeof(GistInetKey)); + + gk_ip_family(result) = family; + gk_ip_minbits(result) = minbits; + gk_ip_commonbits(result) = commonbits; + + /* Clone appropriate bytes of the address. */ + if (commonbits > 0) + memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8); + + /* Clean any unwanted bits in the last partial byte. */ + if (commonbits % 8 != 0) + gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8)); + + /* Set varlena header correctly. */ + SET_GK_VARSIZE(result); + + return result; +} + + +/* + * The GiST union function + * + * See comments at head of file for the definition of the union. + */ +Datum +inet_gist_union(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GISTENTRY *ent = entryvec->vector; + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp, + *result; + + /* Determine parameters of the union. */ + calc_inet_union_params(ent, 0, entryvec->n - 1, + &minfamily, &maxfamily, + &minbits, &commonbits); + + /* If more than one family, emit family number zero. */ + if (minfamily != maxfamily) + minfamily = 0; + + /* Initialize address using the first key. */ + tmp = DatumGetInetKeyP(ent[0].key); + addr = gk_ip_addr(tmp); + + /* Construct the union value. */ + result = build_inet_union_key(minfamily, minbits, commonbits, addr); + + PG_RETURN_POINTER(result); +} + +/* + * The GiST compress function + * + * Convert an inet value to GistInetKey. + */ +Datum +inet_gist_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval; + + if (entry->leafkey) + { + retval = palloc(sizeof(GISTENTRY)); + if (DatumGetPointer(entry->key) != NULL) + { + inet *in = DatumGetInetPP(entry->key); + GistInetKey *r; + + r = (GistInetKey *) palloc0(sizeof(GistInetKey)); + + gk_ip_family(r) = ip_family(in); + gk_ip_minbits(r) = ip_bits(in); + gk_ip_commonbits(r) = gk_ip_maxbits(r); + memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r)); + SET_GK_VARSIZE(r); + + gistentryinit(*retval, PointerGetDatum(r), + entry->rel, entry->page, + entry->offset, FALSE); + } + else + { + gistentryinit(*retval, (Datum) 0, + entry->rel, entry->page, + entry->offset, FALSE); + } + } + else + retval = entry; + PG_RETURN_POINTER(retval); +} + +/* + * The GiST decompress function + * + * do not do anything --- we just use the stored GistInetKey as-is. + */ +Datum +inet_gist_decompress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + + PG_RETURN_POINTER(entry); +} + +/* + * The GiST page split penalty function + * + * Charge a large penalty if address family doesn't match, or a somewhat + * smaller one if the new value would degrade the union's minbits + * (minimum netmask width). Otherwise, penalty is inverse of the + * new number of common address bits. + */ +Datum +inet_gist_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + GistInetKey *orig = DatumGetInetKeyP(origent->key), + *new = DatumGetInetKeyP(newent->key); + int commonbits; + + if (gk_ip_family(orig) == gk_ip_family(new)) + { + if (gk_ip_minbits(orig) <= gk_ip_minbits(new)) + { + commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new), + Min(gk_ip_commonbits(orig), + gk_ip_commonbits(new))); + if (commonbits > 0) + *penalty = 1.0f / commonbits; + else + *penalty = 2; + } + else + *penalty = 3; + } + else + *penalty = 4; + + PG_RETURN_POINTER(penalty); +} + +/* + * The GiST PickSplit method + * + * There are two ways to split. First one is to split by address families, + * if there are multiple families appearing in the input. + * + * The second and more common way is to split by addresses. To achieve this, + * determine the number of leading bits shared by all the keys, then split on + * the next bit. (We don't currently consider the netmask widths while doing + * this; should we?) If we fail to get a nontrivial split that way, split + * 50-50. + */ +Datum +inet_gist_picksplit(PG_FUNCTION_ARGS) +{ + GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + GISTENTRY *ent = entryvec->vector; + int minfamily, + maxfamily, + minbits, + commonbits; + unsigned char *addr; + GistInetKey *tmp, + *left_union, + *right_union; + int maxoff, + nbytes; + OffsetNumber i, + *left, + *right; + + maxoff = entryvec->n - 1; + nbytes = (maxoff + 1) * sizeof(OffsetNumber); + + left = (OffsetNumber *) palloc(nbytes); + right = (OffsetNumber *) palloc(nbytes); + + splitvec->spl_left = left; + splitvec->spl_right = right; + + splitvec->spl_nleft = 0; + splitvec->spl_nright = 0; + + /* Determine parameters of the union of all the inputs. */ + calc_inet_union_params(ent, FirstOffsetNumber, maxoff, + &minfamily, &maxfamily, + &minbits, &commonbits); + + if (minfamily != maxfamily) + { + /* Multiple families, so split by family. */ + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + /* + * If there's more than 2 families, all but maxfamily go into the + * left union. This could only happen if the inputs include some + * IPv4, some IPv6, and some already-multiple-family unions. + */ + tmp = DatumGetInetKeyP(ent[i].key); + if (gk_ip_family(tmp) != maxfamily) + left[splitvec->spl_nleft++] = i; + else + right[splitvec->spl_nright++] = i; + } + } + else + { + /* + * Split on the next bit after the common bits. If that yields a + * trivial split, try the next bit position to the right. Repeat till + * success; or if we run out of bits, do an arbitrary 50-50 split. + */ + int maxbits = ip_family_maxbits(minfamily); + + while (commonbits < maxbits) + { + /* Split using the commonbits'th bit position. */ + int bitbyte = commonbits / 8; + int bitmask = 0x80 >> (commonbits % 8); + + splitvec->spl_nleft = splitvec->spl_nright = 0; + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + tmp = DatumGetInetKeyP(ent[i].key); + addr = gk_ip_addr(tmp); + if ((addr[bitbyte] & bitmask) == 0) + left[splitvec->spl_nleft++] = i; + else + right[splitvec->spl_nright++] = i; + } + + if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0) + break; /* success */ + commonbits++; + } + + if (commonbits >= maxbits) + { + /* Failed ... do a 50-50 split. */ + splitvec->spl_nleft = splitvec->spl_nright = 0; + + for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i)) + { + left[splitvec->spl_nleft++] = i; + } + for (; i <= maxoff; i = OffsetNumberNext(i)) + { + right[splitvec->spl_nright++] = i; + } + } + } + + /* + * Compute the union value for each side from scratch. In most cases we + * could approximate the union values with what we already know, but this + * ensures that each side has minbits and commonbits set as high as + * possible. + */ + calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft, + &minfamily, &maxfamily, + &minbits, &commonbits); + if (minfamily != maxfamily) + minfamily = 0; + tmp = DatumGetInetKeyP(ent[left[0]].key); + addr = gk_ip_addr(tmp); + left_union = build_inet_union_key(minfamily, minbits, commonbits, addr); + splitvec->spl_ldatum = PointerGetDatum(left_union); + + calc_inet_union_params_indexed(ent, right, splitvec->spl_nright, + &minfamily, &maxfamily, + &minbits, &commonbits); + if (minfamily != maxfamily) + minfamily = 0; + tmp = DatumGetInetKeyP(ent[right[0]].key); + addr = gk_ip_addr(tmp); + right_union = build_inet_union_key(minfamily, minbits, commonbits, addr); + splitvec->spl_rdatum = PointerGetDatum(right_union); + + PG_RETURN_POINTER(splitvec); +} + +/* + * The GiST equality function + */ +Datum +inet_gist_same(PG_FUNCTION_ARGS) +{ + GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0)); + GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1)); + bool *result = (bool *) PG_GETARG_POINTER(2); + + *result = (gk_ip_family(left) == gk_ip_family(right) && + gk_ip_minbits(left) == gk_ip_minbits(right) && + gk_ip_commonbits(left) == gk_ip_commonbits(right) && + memcmp(gk_ip_addr(left), gk_ip_addr(right), + gk_ip_addrsize(left)) == 0); + + PG_RETURN_POINTER(result); +} diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c new file mode 100644 index 0000000000..d0d806f889 --- /dev/null +++ b/src/backend/utils/adt/network_selfuncs.c @@ -0,0 +1,32 @@ +/*------------------------------------------------------------------------- + * + * network_selfuncs.c + * Functions for selectivity estimation of inet/cidr operators + * + * Currently these are just stubs, but we hope to do better soon. + * + * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/network_selfuncs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/inet.h" + + +Datum +networksel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.001); +} + +Datum +networkjoinsel(PG_FUNCTION_ARGS) +{ + PG_RETURN_FLOAT8(0.001); +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 4b3357ccd9..fe6144e2d3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201404081 +#define CATALOG_VERSION_NO 201404082 #endif diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index 2623113557..8efd3be3c6 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -818,4 +818,19 @@ DATA(insert ( 3474 3831 3831 8 s 3892 4000 0 )); DATA(insert ( 3474 3831 2283 16 s 3889 4000 0 )); DATA(insert ( 3474 3831 3831 18 s 3882 4000 0 )); +/* + * GiST inet_ops + */ +DATA(insert ( 3550 869 869 3 s 3552 783 0 )); +DATA(insert ( 3550 869 869 18 s 1201 783 0 )); +DATA(insert ( 3550 869 869 19 s 1202 783 0 )); +DATA(insert ( 3550 869 869 20 s 1203 783 0 )); +DATA(insert ( 3550 869 869 21 s 1204 783 0 )); +DATA(insert ( 3550 869 869 22 s 1205 783 0 )); +DATA(insert ( 3550 869 869 23 s 1206 783 0 )); +DATA(insert ( 3550 869 869 24 s 931 783 0 )); +DATA(insert ( 3550 869 869 25 s 932 783 0 )); +DATA(insert ( 3550 869 869 26 s 933 783 0 )); +DATA(insert ( 3550 869 869 27 s 934 783 0 )); + #endif /* PG_AMOP_H */ diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index b28dd563a8..198b126964 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -399,6 +399,13 @@ DATA(insert ( 4037 3802 3802 2 3485 )); DATA(insert ( 4037 3802 3802 3 3486 )); DATA(insert ( 4037 3802 3802 4 3487 )); DATA(insert ( 4037 3802 3802 6 3489 )); +DATA(insert ( 3550 869 869 1 3553 )); +DATA(insert ( 3550 869 869 2 3554 )); +DATA(insert ( 3550 869 869 3 3555 )); +DATA(insert ( 3550 869 869 4 3556 )); +DATA(insert ( 3550 869 869 5 3557 )); +DATA(insert ( 3550 869 869 6 3558 )); +DATA(insert ( 3550 869 869 7 3559 )); /* sp-gist */ DATA(insert ( 3474 3831 3831 1 3469 )); diff --git a/src/include/catalog/pg_opclass.h b/src/include/catalog/pg_opclass.h index 63a40a8412..49b24108de 100644 --- a/src/include/catalog/pg_opclass.h +++ b/src/include/catalog/pg_opclass.h @@ -112,6 +112,7 @@ DATA(insert OID = 3123 ( 403 float8_ops PGNSP PGUID 1970 701 t 0 )); DATA(insert ( 405 float8_ops PGNSP PGUID 1971 701 t 0 )); DATA(insert ( 403 inet_ops PGNSP PGUID 1974 869 t 0 )); DATA(insert ( 405 inet_ops PGNSP PGUID 1975 869 t 0 )); +DATA(insert ( 783 inet_ops PGNSP PGUID 3550 869 f 0 )); DATA(insert OID = 1979 ( 403 int2_ops PGNSP PGUID 1976 21 t 0 )); #define INT2_BTREE_OPS_OID 1979 DATA(insert ( 405 int2_ops PGNSP PGUID 1977 21 t 0 )); diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index ac09034f3d..f280af441c 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -1140,18 +1140,20 @@ DATA(insert OID = 1205 ( ">" PGNSP PGUID b f f 869 869 16 1203 1204 network DESCR("greater than"); DATA(insert OID = 1206 ( ">=" PGNSP PGUID b f f 869 869 16 1204 1203 network_ge scalargtsel scalargtjoinsel )); DESCR("greater than or equal"); -DATA(insert OID = 931 ( "<<" PGNSP PGUID b f f 869 869 16 933 0 network_sub - - )); +DATA(insert OID = 931 ( "<<" PGNSP PGUID b f f 869 869 16 933 0 network_sub networksel networkjoinsel )); DESCR("is subnet"); #define OID_INET_SUB_OP 931 -DATA(insert OID = 932 ( "<<=" PGNSP PGUID b f f 869 869 16 934 0 network_subeq - - )); +DATA(insert OID = 932 ( "<<=" PGNSP PGUID b f f 869 869 16 934 0 network_subeq networksel networkjoinsel )); DESCR("is subnet or equal"); #define OID_INET_SUBEQ_OP 932 -DATA(insert OID = 933 ( ">>" PGNSP PGUID b f f 869 869 16 931 0 network_sup - - )); +DATA(insert OID = 933 ( ">>" PGNSP PGUID b f f 869 869 16 931 0 network_sup networksel networkjoinsel )); DESCR("is supernet"); #define OID_INET_SUP_OP 933 -DATA(insert OID = 934 ( ">>=" PGNSP PGUID b f f 869 869 16 932 0 network_supeq - - )); +DATA(insert OID = 934 ( ">>=" PGNSP PGUID b f f 869 869 16 932 0 network_supeq networksel networkjoinsel )); DESCR("is supernet or equal"); #define OID_INET_SUPEQ_OP 934 +DATA(insert OID = 3552 ( "&&" PGNSP PGUID b f f 869 869 16 3552 0 network_overlap networksel networkjoinsel )); +DESCR("overlaps (is subnet or supernet)"); DATA(insert OID = 2634 ( "~" PGNSP PGUID l f f 0 869 869 0 0 inetnot - - )); DESCR("bitwise not"); diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index 775be86c1a..9e8f4ac5b6 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -78,6 +78,7 @@ DATA(insert OID = 1971 ( 405 float_ops PGNSP PGUID )); DATA(insert OID = 1974 ( 403 network_ops PGNSP PGUID )); #define NETWORK_BTREE_FAM_OID 1974 DATA(insert OID = 1975 ( 405 network_ops PGNSP PGUID )); +DATA(insert OID = 3550 ( 783 network_ops PGNSP PGUID )); DATA(insert OID = 1976 ( 403 integer_ops PGNSP PGUID )); #define INTEGER_BTREE_FAM_OID 1976 DATA(insert OID = 1977 ( 405 integer_ops PGNSP PGUID )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 21c17a08ed..7b9c5870fa 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2120,6 +2120,7 @@ DATA(insert OID = 927 ( network_sub PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1 DATA(insert OID = 928 ( network_subeq PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "869 869" _null_ _null_ _null_ _null_ network_subeq _null_ _null_ _null_ )); DATA(insert OID = 929 ( network_sup PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "869 869" _null_ _null_ _null_ _null_ network_sup _null_ _null_ _null_ )); DATA(insert OID = 930 ( network_supeq PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "869 869" _null_ _null_ _null_ _null_ network_supeq _null_ _null_ _null_ )); +DATA(insert OID = 3551 ( network_overlap PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "869 869" _null_ _null_ _null_ _null_ network_overlap _null_ _null_ _null_ )); /* inet/cidr functions */ DATA(insert OID = 598 ( abbrev PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "869" _null_ _null_ _null_ _null_ inet_abbrev _null_ _null_ _null_ )); @@ -2166,6 +2167,28 @@ DATA(insert OID = 2631 ( int8pl_inet PGNSP PGUID 14 1 0 0 0 f f f f t f i 2 0 DATA(insert OID = 2632 ( inetmi_int8 PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 869 "869 20" _null_ _null_ _null_ _null_ inetmi_int8 _null_ _null_ _null_ )); DATA(insert OID = 2633 ( inetmi PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 20 "869 869" _null_ _null_ _null_ _null_ inetmi _null_ _null_ _null_ )); +/* GiST support for inet and cidr */ +DATA(insert OID = 3553 ( inet_gist_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i 5 0 16 "2281 869 23 26 2281" _null_ _null_ _null_ _null_ inet_gist_consistent _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3554 ( inet_gist_union PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ inet_gist_union _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3555 ( inet_gist_compress PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2281 "2281" _null_ _null_ _null_ _null_ inet_gist_compress _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3556 ( inet_gist_decompress PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 2281 "2281" _null_ _null_ _null_ _null_ inet_gist_decompress _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3557 ( inet_gist_penalty PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 2281 "2281 2281 2281" _null_ _null_ _null_ _null_ inet_gist_penalty _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3558 ( inet_gist_picksplit PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2281 "2281 2281" _null_ _null_ _null_ _null_ inet_gist_picksplit _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3559 ( inet_gist_same PGNSP PGUID 12 1 0 0 0 f f f f t f i 3 0 2281 "869 869 2281" _null_ _null_ _null_ _null_ inet_gist_same _null_ _null_ _null_ )); +DESCR("GiST support"); + +/* Selectivity estimation for inet and cidr */ +DATA(insert OID = 3560 ( networksel PGNSP PGUID 12 1 0 0 0 f f f f t f s 4 0 701 "2281 26 2281 23" _null_ _null_ _null_ _null_ networksel _null_ _null_ _null_ )); +DESCR("restriction selectivity for network operators"); +DATA(insert OID = 3561 ( networkjoinsel PGNSP PGUID 12 1 0 0 0 f f f f t f s 5 0 701 "2281 26 2281 21 2281" _null_ _null_ _null_ _null_ networkjoinsel _null_ _null_ _null_ )); +DESCR("join selectivity for network operators"); + DATA(insert OID = 1690 ( time_mi_time PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 1186 "1083 1083" _null_ _null_ _null_ _null_ time_mi_time _null_ _null_ _null_ )); DATA(insert OID = 1691 ( boolle PGNSP PGUID 12 1 0 0 0 f f f t t f i 2 0 16 "16 16" _null_ _null_ _null_ _null_ boolle _null_ _null_ _null_ )); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 720c831801..5907cb13fd 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -906,6 +906,7 @@ extern Datum network_sub(PG_FUNCTION_ARGS); extern Datum network_subeq(PG_FUNCTION_ARGS); extern Datum network_sup(PG_FUNCTION_ARGS); extern Datum network_supeq(PG_FUNCTION_ARGS); +extern Datum network_overlap(PG_FUNCTION_ARGS); extern Datum network_network(PG_FUNCTION_ARGS); extern Datum network_netmask(PG_FUNCTION_ARGS); extern Datum network_hostmask(PG_FUNCTION_ARGS); diff --git a/src/include/utils/inet.h b/src/include/utils/inet.h index 330f32de8f..bd31c7169a 100644 --- a/src/include/utils/inet.h +++ b/src/include/utils/inet.h @@ -53,6 +53,38 @@ typedef struct inet_struct inet_data; } inet; +/* + * Access macros. We use VARDATA_ANY so that we can process short-header + * varlena values without detoasting them. This requires a trick: + * VARDATA_ANY assumes the varlena header is already filled in, which is + * not the case when constructing a new value (until SET_INET_VARSIZE is + * called, which we typically can't do till the end). Therefore, we + * always initialize the newly-allocated value to zeroes (using palloc0). + * A zero length word will look like the not-1-byte case to VARDATA_ANY, + * and so we correctly construct an uncompressed value. + * + * Note that ip_addrsize(), ip_maxbits(), and SET_INET_VARSIZE() require + * the family field to be set correctly. + */ +#define ip_family(inetptr) \ + (((inet_struct *) VARDATA_ANY(inetptr))->family) + +#define ip_bits(inetptr) \ + (((inet_struct *) VARDATA_ANY(inetptr))->bits) + +#define ip_addr(inetptr) \ + (((inet_struct *) VARDATA_ANY(inetptr))->ipaddr) + +#define ip_addrsize(inetptr) \ + (ip_family(inetptr) == PGSQL_AF_INET ? 4 : 16) + +#define ip_maxbits(inetptr) \ + (ip_family(inetptr) == PGSQL_AF_INET ? 32 : 128) + +#define SET_INET_VARSIZE(dst) \ + SET_VARSIZE(dst, VARHDRSZ + offsetof(inet_struct, ipaddr) + \ + ip_addrsize(dst)) + /* * This is the internal storage format for MAC addresses: @@ -82,4 +114,27 @@ typedef struct macaddr #define PG_GETARG_MACADDR_P(n) DatumGetMacaddrP(PG_GETARG_DATUM(n)) #define PG_RETURN_MACADDR_P(x) return MacaddrPGetDatum(x) +/* + * Support functions in network.c + */ +extern int bitncmp(const unsigned char *l, const unsigned char *r, int n); +extern int bitncommon(const unsigned char *l, const unsigned char *r, int n); + +/* + * GiST support functions in network_gist.c + */ +extern Datum inet_gist_consistent(PG_FUNCTION_ARGS); +extern Datum inet_gist_union(PG_FUNCTION_ARGS); +extern Datum inet_gist_compress(PG_FUNCTION_ARGS); +extern Datum inet_gist_decompress(PG_FUNCTION_ARGS); +extern Datum inet_gist_penalty(PG_FUNCTION_ARGS); +extern Datum inet_gist_picksplit(PG_FUNCTION_ARGS); +extern Datum inet_gist_same(PG_FUNCTION_ARGS); + +/* + * Estimation functions in network_selfuncs.c + */ +extern Datum networksel(PG_FUNCTION_ARGS); +extern Datum networkjoinsel(PG_FUNCTION_ARGS); + #endif /* INET_H */ diff --git a/src/test/regress/expected/inet.out b/src/test/regress/expected/inet.out index 356a397822..008cc0b5dd 100644 --- a/src/test/regress/expected/inet.out +++ b/src/test/regress/expected/inet.out @@ -180,27 +180,28 @@ SELECT '' AS ten, i, c, i < c AS lt, i <= c AS le, i = c AS eq, i >= c AS ge, i > c AS gt, i <> c AS ne, i << c AS sb, i <<= c AS sbe, - i >> c AS sup, i >>= c AS spe + i >> c AS sup, i >>= c AS spe, + i && c AS ovr FROM INET_TBL; - ten | i | c | lt | le | eq | ge | gt | ne | sb | sbe | sup | spe ------+------------------+--------------------+----+----+----+----+----+----+----+-----+-----+----- - | 192.168.1.226/24 | 192.168.1.0/24 | f | f | f | t | t | t | f | t | f | t - | 192.168.1.226 | 192.168.1.0/26 | f | f | f | t | t | t | f | f | f | f - | 192.168.1.0/24 | 192.168.1.0/24 | f | t | t | t | f | f | f | t | f | t - | 192.168.1.0/25 | 192.168.1.0/24 | f | f | f | t | t | t | t | t | f | f - | 192.168.1.255/24 | 192.168.1.0/24 | f | f | f | t | t | t | f | t | f | t - | 192.168.1.255/25 | 192.168.1.0/24 | f | f | f | t | t | t | t | t | f | f - | 10.1.2.3/8 | 10.0.0.0/8 | f | f | f | t | t | t | f | t | f | t - | 10.1.2.3/8 | 10.0.0.0/32 | t | t | f | f | f | t | f | f | t | t - | 10.1.2.3 | 10.1.2.3/32 | f | t | t | t | f | f | f | t | f | t - | 10.1.2.3/24 | 10.1.2.0/24 | f | f | f | t | t | t | f | t | f | t - | 10.1.2.3/16 | 10.1.0.0/16 | f | f | f | t | t | t | f | t | f | t - | 10.1.2.3/8 | 10.0.0.0/8 | f | f | f | t | t | t | f | t | f | t - | 11.1.2.3/8 | 10.0.0.0/8 | f | f | f | t | t | t | f | f | f | f - | 9.1.2.3/8 | 10.0.0.0/8 | t | t | f | f | f | t | f | f | f | f - | 10:23::f1/64 | 10:23::f1/128 | t | t | f | f | f | t | f | f | t | t - | 10:23::ffff | 10:23::8000/113 | f | f | f | t | t | t | t | t | f | f - | ::4.3.2.1/24 | ::ffff:1.2.3.4/128 | t | t | f | f | f | t | f | f | t | t + ten | i | c | lt | le | eq | ge | gt | ne | sb | sbe | sup | spe | ovr +-----+------------------+--------------------+----+----+----+----+----+----+----+-----+-----+-----+----- + | 192.168.1.226/24 | 192.168.1.0/24 | f | f | f | t | t | t | f | t | f | t | t + | 192.168.1.226 | 192.168.1.0/26 | f | f | f | t | t | t | f | f | f | f | f + | 192.168.1.0/24 | 192.168.1.0/24 | f | t | t | t | f | f | f | t | f | t | t + | 192.168.1.0/25 | 192.168.1.0/24 | f | f | f | t | t | t | t | t | f | f | t + | 192.168.1.255/24 | 192.168.1.0/24 | f | f | f | t | t | t | f | t | f | t | t + | 192.168.1.255/25 | 192.168.1.0/24 | f | f | f | t | t | t | t | t | f | f | t + | 10.1.2.3/8 | 10.0.0.0/8 | f | f | f | t | t | t | f | t | f | t | t + | 10.1.2.3/8 | 10.0.0.0/32 | t | t | f | f | f | t | f | f | t | t | t + | 10.1.2.3 | 10.1.2.3/32 | f | t | t | t | f | f | f | t | f | t | t + | 10.1.2.3/24 | 10.1.2.0/24 | f | f | f | t | t | t | f | t | f | t | t + | 10.1.2.3/16 | 10.1.0.0/16 | f | f | f | t | t | t | f | t | f | t | t + | 10.1.2.3/8 | 10.0.0.0/8 | f | f | f | t | t | t | f | t | f | t | t + | 11.1.2.3/8 | 10.0.0.0/8 | f | f | f | t | t | t | f | f | f | f | f + | 9.1.2.3/8 | 10.0.0.0/8 | t | t | f | f | f | t | f | f | f | f | f + | 10:23::f1/64 | 10:23::f1/128 | t | t | f | f | f | t | f | f | t | t | t + | 10:23::ffff | 10:23::8000/113 | f | f | f | t | t | t | t | t | f | f | t + | ::4.3.2.1/24 | ::ffff:1.2.3.4/128 | t | t | f | f | f | t | f | f | t | t | t (17 rows) -- check the conversion to/from text and set_netmask @@ -226,7 +227,7 @@ SELECT '' AS ten, set_masklen(inet(text(i)), 24) FROM INET_TBL; | ::4.3.2.1/24 (17 rows) --- check that index works correctly +-- check that btree index works correctly CREATE INDEX inet_idx1 ON inet_tbl(i); SET enable_seqscan TO off; SELECT * FROM inet_tbl WHERE i<<'192.168.1.0/24'::cidr; @@ -250,6 +251,135 @@ SELECT * FROM inet_tbl WHERE i<<='192.168.1.0/24'::cidr; SET enable_seqscan TO on; DROP INDEX inet_idx1; +-- check that gist index works correctly +CREATE INDEX inet_idx2 ON inet_tbl using gist (i inet_ops); +SET enable_seqscan TO off; +SELECT * FROM inet_tbl WHERE i << '192.168.1.0/24'::cidr ORDER BY i; + c | i +----------------+------------------ + 192.168.1.0/24 | 192.168.1.0/25 + 192.168.1.0/24 | 192.168.1.255/25 + 192.168.1.0/26 | 192.168.1.226 +(3 rows) + +SELECT * FROM inet_tbl WHERE i <<= '192.168.1.0/24'::cidr ORDER BY i; + c | i +----------------+------------------ + 192.168.1.0/24 | 192.168.1.0/24 + 192.168.1.0/24 | 192.168.1.226/24 + 192.168.1.0/24 | 192.168.1.255/24 + 192.168.1.0/24 | 192.168.1.0/25 + 192.168.1.0/24 | 192.168.1.255/25 + 192.168.1.0/26 | 192.168.1.226 +(6 rows) + +SELECT * FROM inet_tbl WHERE i && '192.168.1.0/24'::cidr ORDER BY i; + c | i +----------------+------------------ + 192.168.1.0/24 | 192.168.1.0/24 + 192.168.1.0/24 | 192.168.1.226/24 + 192.168.1.0/24 | 192.168.1.255/24 + 192.168.1.0/24 | 192.168.1.0/25 + 192.168.1.0/24 | 192.168.1.255/25 + 192.168.1.0/26 | 192.168.1.226 +(6 rows) + +SELECT * FROM inet_tbl WHERE i >>= '192.168.1.0/24'::cidr ORDER BY i; + c | i +----------------+------------------ + 192.168.1.0/24 | 192.168.1.0/24 + 192.168.1.0/24 | 192.168.1.226/24 + 192.168.1.0/24 | 192.168.1.255/24 +(3 rows) + +SELECT * FROM inet_tbl WHERE i >> '192.168.1.0/24'::cidr ORDER BY i; + c | i +---+--- +(0 rows) + +SELECT * FROM inet_tbl WHERE i < '192.168.1.0/24'::cidr ORDER BY i; + c | i +-------------+------------- + 10.0.0.0/8 | 9.1.2.3/8 + 10.0.0.0/32 | 10.1.2.3/8 + 10.0.0.0/8 | 10.1.2.3/8 + 10.0.0.0/8 | 10.1.2.3/8 + 10.1.0.0/16 | 10.1.2.3/16 + 10.1.2.0/24 | 10.1.2.3/24 + 10.1.2.3/32 | 10.1.2.3 + 10.0.0.0/8 | 11.1.2.3/8 +(8 rows) + +SELECT * FROM inet_tbl WHERE i <= '192.168.1.0/24'::cidr ORDER BY i; + c | i +----------------+---------------- + 10.0.0.0/8 | 9.1.2.3/8 + 10.0.0.0/8 | 10.1.2.3/8 + 10.0.0.0/32 | 10.1.2.3/8 + 10.0.0.0/8 | 10.1.2.3/8 + 10.1.0.0/16 | 10.1.2.3/16 + 10.1.2.0/24 | 10.1.2.3/24 + 10.1.2.3/32 | 10.1.2.3 + 10.0.0.0/8 | 11.1.2.3/8 + 192.168.1.0/24 | 192.168.1.0/24 +(9 rows) + +SELECT * FROM inet_tbl WHERE i = '192.168.1.0/24'::cidr ORDER BY i; + c | i +----------------+---------------- + 192.168.1.0/24 | 192.168.1.0/24 +(1 row) + +SELECT * FROM inet_tbl WHERE i >= '192.168.1.0/24'::cidr ORDER BY i; + c | i +--------------------+------------------ + 192.168.1.0/24 | 192.168.1.0/24 + 192.168.1.0/24 | 192.168.1.226/24 + 192.168.1.0/24 | 192.168.1.255/24 + 192.168.1.0/24 | 192.168.1.0/25 + 192.168.1.0/24 | 192.168.1.255/25 + 192.168.1.0/26 | 192.168.1.226 + ::ffff:1.2.3.4/128 | ::4.3.2.1/24 + 10:23::f1/128 | 10:23::f1/64 + 10:23::8000/113 | 10:23::ffff +(9 rows) + +SELECT * FROM inet_tbl WHERE i > '192.168.1.0/24'::cidr ORDER BY i; + c | i +--------------------+------------------ + 192.168.1.0/24 | 192.168.1.226/24 + 192.168.1.0/24 | 192.168.1.255/24 + 192.168.1.0/24 | 192.168.1.0/25 + 192.168.1.0/24 | 192.168.1.255/25 + 192.168.1.0/26 | 192.168.1.226 + ::ffff:1.2.3.4/128 | ::4.3.2.1/24 + 10:23::f1/128 | 10:23::f1/64 + 10:23::8000/113 | 10:23::ffff +(8 rows) + +SELECT * FROM inet_tbl WHERE i <> '192.168.1.0/24'::cidr ORDER BY i; + c | i +--------------------+------------------ + 10.0.0.0/8 | 9.1.2.3/8 + 10.0.0.0/8 | 10.1.2.3/8 + 10.0.0.0/32 | 10.1.2.3/8 + 10.0.0.0/8 | 10.1.2.3/8 + 10.1.0.0/16 | 10.1.2.3/16 + 10.1.2.0/24 | 10.1.2.3/24 + 10.1.2.3/32 | 10.1.2.3 + 10.0.0.0/8 | 11.1.2.3/8 + 192.168.1.0/24 | 192.168.1.226/24 + 192.168.1.0/24 | 192.168.1.255/24 + 192.168.1.0/24 | 192.168.1.0/25 + 192.168.1.0/24 | 192.168.1.255/25 + 192.168.1.0/26 | 192.168.1.226 + ::ffff:1.2.3.4/128 | ::4.3.2.1/24 + 10:23::f1/128 | 10:23::f1/64 + 10:23::8000/113 | 10:23::ffff +(16 rows) + +SET enable_seqscan TO on; +DROP INDEX inet_idx2; -- simple tests of inet boolean and arithmetic operators SELECT i, ~i AS "~i" FROM inet_tbl; i | ~i diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index bf76501435..118f7e43dc 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -1118,6 +1118,15 @@ ORDER BY 1, 2, 3; 783 | 15 | <-> 783 | 16 | @> 783 | 18 | = + 783 | 19 | <> + 783 | 20 | < + 783 | 21 | <= + 783 | 22 | > + 783 | 23 | >= + 783 | 24 | << + 783 | 25 | <<= + 783 | 26 | >> + 783 | 27 | >>= 783 | 28 | <@ 783 | 48 | <@ 783 | 68 | <@ @@ -1153,7 +1162,7 @@ ORDER BY 1, 2, 3; 4000 | 15 | > 4000 | 16 | @> 4000 | 18 | = -(71 rows) +(80 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing diff --git a/src/test/regress/sql/inet.sql b/src/test/regress/sql/inet.sql index 328f14907b..be078fbb84 100644 --- a/src/test/regress/sql/inet.sql +++ b/src/test/regress/sql/inet.sql @@ -52,12 +52,14 @@ SELECT '' AS ten, i, c, i < c AS lt, i <= c AS le, i = c AS eq, i >= c AS ge, i > c AS gt, i <> c AS ne, i << c AS sb, i <<= c AS sbe, - i >> c AS sup, i >>= c AS spe + i >> c AS sup, i >>= c AS spe, + i && c AS ovr FROM INET_TBL; -- check the conversion to/from text and set_netmask SELECT '' AS ten, set_masklen(inet(text(i)), 24) FROM INET_TBL; --- check that index works correctly + +-- check that btree index works correctly CREATE INDEX inet_idx1 ON inet_tbl(i); SET enable_seqscan TO off; SELECT * FROM inet_tbl WHERE i<<'192.168.1.0/24'::cidr; @@ -65,6 +67,23 @@ SELECT * FROM inet_tbl WHERE i<<='192.168.1.0/24'::cidr; SET enable_seqscan TO on; DROP INDEX inet_idx1; +-- check that gist index works correctly +CREATE INDEX inet_idx2 ON inet_tbl using gist (i inet_ops); +SET enable_seqscan TO off; +SELECT * FROM inet_tbl WHERE i << '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i <<= '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i && '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i >>= '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i >> '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i < '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i <= '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i = '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i >= '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i > '192.168.1.0/24'::cidr ORDER BY i; +SELECT * FROM inet_tbl WHERE i <> '192.168.1.0/24'::cidr ORDER BY i; +SET enable_seqscan TO on; +DROP INDEX inet_idx2; + -- simple tests of inet boolean and arithmetic operators SELECT i, ~i AS "~i" FROM inet_tbl; SELECT i, c, i & c AS "and" FROM inet_tbl;