Give inet/cidr datatypes their own hash function that ignores the inet vs

cidr type bit, the same as network_eq does.  This is needed for hash joins
and hash aggregation to work correctly on these types.  Per bug report
from Michael Fuhr, 2004-04-13.
Also, improve hash function for int8 as suggested by Greg Stark.
This commit is contained in:
Tom Lane 2004-06-13 21:57:28 +00:00
parent 0e338bba42
commit 950d047ec5
9 changed files with 77 additions and 18 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.39 2003/11/29 19:51:40 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.40 2004/06/13 21:57:24 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
@ -44,8 +44,26 @@ hashint4(PG_FUNCTION_ARGS)
Datum
hashint8(PG_FUNCTION_ARGS)
{
/* we just use the low 32 bits... */
/*
* The idea here is to produce a hash value compatible with the values
* produced by hashint4 and hashint2 for logically equivalent inputs;
* this is necessary if we ever hope to support cross-type hash joins
* across these input types. Since all three types are signed, we can
* xor the high half of the int8 value if the sign is positive, or the
* complement of the high half when the sign is negative.
*/
#ifndef INT64_IS_BUSTED
int64 val = PG_GETARG_INT64(0);
uint32 lohalf = (uint32) val;
uint32 hihalf = (uint32) (val >> 32);
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
PG_RETURN_UINT32(~lohalf);
#else
/* here if we can't count on "x >> 32" to work sanely */
PG_RETURN_UINT32(~((uint32) PG_GETARG_INT64(0)));
#endif
}
Datum

View File

@ -1,18 +1,18 @@
/*
* PostgreSQL type definitions for the INET and CIDR types.
*
* $PostgreSQL: pgsql/src/backend/utils/adt/network.c,v 1.51 2004/06/13 19:56:50 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/network.c,v 1.52 2004/06/13 21:57:25 tgl Exp $
*
* Jon Postel RIP 16 Oct 1998
*/
#include "postgres.h"
#include <errno.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include "access/hash.h"
#include "catalog/pg_type.h"
#include "libpq/ip.h"
#include "libpq/libpq-be.h"
@ -42,7 +42,7 @@ static int ip_addrsize(inet *inetptr);
(((inet_struct *)VARDATA(inetptr))->type)
#define ip_addr(inetptr) \
(((inet_struct *)VARDATA(inetptr))->ip_addr)
(((inet_struct *)VARDATA(inetptr))->ipaddr)
#define ip_maxbits(inetptr) \
(ip_family(inetptr) == PGSQL_AF_INET ? 32 : 128)
@ -60,7 +60,7 @@ ip_addrsize(inet *inetptr)
case PGSQL_AF_INET6:
return 16;
default:
return -1;
return 0;
}
}
@ -424,6 +424,27 @@ network_ne(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(network_cmp_internal(a1, a2) != 0);
}
/*
* Support function for hash indexes on inet/cidr.
*
* Since network_cmp considers only ip_family, ip_bits, and ip_addr,
* only these fields may be used in the hash; in particular don't use type.
*/
Datum
hashinet(PG_FUNCTION_ARGS)
{
inet *addr = PG_GETARG_INET_P(0);
int addrsize = ip_addrsize(addr);
unsigned char key[sizeof(inet_struct)];
Assert(addrsize + 2 <= sizeof(key));
key[0] = ip_family(addr);
key[1] = ip_bits(addr);
memcpy(key + 2, ip_addr(addr), addrsize);
return hash_any(key, addrsize + 2);
}
/*
* Boolean network-inclusion tests.
*/

View File

@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.234 2004/06/06 19:06:59 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.235 2004/06/13 21:57:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200406061
#define CATALOG_VERSION_NO 200406131
#endif

View File

@ -19,7 +19,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.48 2004/03/22 01:38:17 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.49 2004/06/13 21:57:25 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@ -136,11 +136,11 @@ DATA(insert ( 2234 0 1 381 ));
/* hash */
DATA(insert ( 427 0 1 1080 ));
DATA(insert ( 431 0 1 454 ));
DATA(insert ( 433 0 1 456 ));
DATA(insert ( 433 0 1 422 ));
DATA(insert ( 435 0 1 450 ));
DATA(insert ( 1971 0 1 451 ));
DATA(insert ( 1973 0 1 452 ));
DATA(insert ( 1975 0 1 456 ));
DATA(insert ( 1975 0 1 422 ));
DATA(insert ( 1977 0 1 449 ));
DATA(insert ( 1979 0 1 450 ));
DATA(insert ( 1981 0 1 949 ));

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.336 2004/06/13 19:56:51 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.337 2004/06/13 21:57:26 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@ -851,6 +851,8 @@ DATA(insert OID = 398 ( hashint2vector PGNSP PGUID 12 f f t f i 1 23 "22" _n
DESCR("hash");
DATA(insert OID = 399 ( hashmacaddr PGNSP PGUID 12 f f t f i 1 23 "829" _null_ hashmacaddr - _null_ ));
DESCR("hash");
DATA(insert OID = 422 ( hashinet PGNSP PGUID 12 f f t f i 1 23 "869" _null_ hashinet - _null_ ));
DESCR("hash");
DATA(insert OID = 458 ( text_larger PGNSP PGUID 12 f f t f i 2 25 "25 25" _null_ text_larger - _null_ ));
DESCR("larger of two");
DATA(insert OID = 459 ( text_smaller PGNSP PGUID 12 f f t f i 2 25 "25 25" _null_ text_smaller - _null_ ));

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.242 2004/06/13 19:56:52 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.243 2004/06/13 21:57:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -664,6 +664,7 @@ extern Datum network_eq(PG_FUNCTION_ARGS);
extern Datum network_ge(PG_FUNCTION_ARGS);
extern Datum network_gt(PG_FUNCTION_ARGS);
extern Datum network_ne(PG_FUNCTION_ARGS);
extern Datum hashinet(PG_FUNCTION_ARGS);
extern Datum network_sub(PG_FUNCTION_ARGS);
extern Datum network_subeq(PG_FUNCTION_ARGS);
extern Datum network_sup(PG_FUNCTION_ARGS);

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/inet.h,v 1.17 2003/11/29 22:41:15 pgsql Exp $
* $PostgreSQL: pgsql/src/include/utils/inet.h,v 1.18 2004/06/13 21:57:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -20,10 +20,10 @@
*/
typedef struct
{
unsigned char family;
unsigned char bits;
unsigned char type;
unsigned char ip_addr[16]; /* 128 bits of address */
unsigned char family; /* PGSQL_AF_INET or PGSQL_AF_INET6 */
unsigned char bits; /* number of bits in netmask */
unsigned char type; /* 0 = inet, 1 = cidr */
unsigned char ipaddr[16]; /* up to 128 bits of address */
} inet_struct;
/*

View File

@ -487,6 +487,16 @@ WHERE p1.oprcanhash AND NOT EXISTS
-----+---------
(0 rows)
-- And the converse.
SELECT p1.oid, p1.oprname, op.opcname
FROM pg_operator AS p1, pg_opclass op, pg_amop p
WHERE amopopr = p1.oid AND amopclaid = op.oid
AND opcamid = (SELECT oid FROM pg_am WHERE amname = 'hash')
AND NOT p1.oprcanhash;
oid | oprname | opcname
-----+---------+---------
(0 rows)
-- Check that each operator defined in pg_operator matches its oprcode entry
-- in pg_proc. Easiest to do this separately for each oprkind.
SELECT p1.oid, p1.oprname, p2.oid, p2.proname

View File

@ -409,6 +409,13 @@ WHERE p1.oprcanhash AND NOT EXISTS
WHERE opcamid = (SELECT oid FROM pg_am WHERE amname = 'hash') AND
amopopr = p1.oid);
-- And the converse.
SELECT p1.oid, p1.oprname, op.opcname
FROM pg_operator AS p1, pg_opclass op, pg_amop p
WHERE amopopr = p1.oid AND amopclaid = op.oid
AND opcamid = (SELECT oid FROM pg_am WHERE amname = 'hash')
AND NOT p1.oprcanhash;
-- Check that each operator defined in pg_operator matches its oprcode entry
-- in pg_proc. Easiest to do this separately for each oprkind.