Make OVERLAPS operators conform to SQL92 spec regarding NULL handling.

As I read it, the spec requires a non-null result in some cases where
one of the inputs is NULL: specifically, if the other endpoint of that
interval is between the endpoints of the other interval, then the result
is known TRUE despite the missing endpoint.  The spec could've been a
lot simpler if they did not intend this behavior.
I did not force an initdb for this change, but if you don't do one you'll
still see the old strict-function behavior.
This commit is contained in:
Tom Lane 2000-12-07 18:38:59 +00:00
parent 8bb4dab94d
commit 821f4673ff
3 changed files with 297 additions and 73 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.53 2000/12/03 14:51:01 thomas Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/date.c,v 1.54 2000/12/07 18:38:59 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -529,37 +529,122 @@ time_smaller(PG_FUNCTION_ARGS)
PG_RETURN_TIMEADT((time1 < time2) ? time1 : time2); PG_RETURN_TIMEADT((time1 < time2) ? time1 : time2);
} }
/* overlaps_time() /* overlaps_time() --- implements the SQL92 OVERLAPS operator.
* Implements the SQL92 OVERLAPS operator. *
* Algorithm from Date and Darwen, 1997 * Algorithm is per SQL92 spec. This is much harder than you'd think
* because the spec requires us to deliver a non-null answer in some cases
* where some of the inputs are null.
*/ */
Datum Datum
overlaps_time(PG_FUNCTION_ARGS) overlaps_time(PG_FUNCTION_ARGS)
{ {
TimeADT ts1 = PG_GETARG_TIMEADT(0); /* The arguments are TimeADT, but we leave them as generic Datums
TimeADT te1 = PG_GETARG_TIMEADT(1); * to avoid dereferencing nulls (TimeADT is pass-by-reference!)
TimeADT ts2 = PG_GETARG_TIMEADT(2); */
TimeADT te2 = PG_GETARG_TIMEADT(3); Datum ts1 = PG_GETARG_DATUM(0);
Datum te1 = PG_GETARG_DATUM(1);
Datum ts2 = PG_GETARG_DATUM(2);
Datum te2 = PG_GETARG_DATUM(3);
bool ts1IsNull = PG_ARGISNULL(0);
bool te1IsNull = PG_ARGISNULL(1);
bool ts2IsNull = PG_ARGISNULL(2);
bool te2IsNull = PG_ARGISNULL(3);
/* Make sure we have ordered pairs... */ #define TIMEADT_GT(t1,t2) \
if (ts1 > te1) (DatumGetTimeADT(t1) > DatumGetTimeADT(t2))
#define TIMEADT_LT(t1,t2) \
(DatumGetTimeADT(t1) < DatumGetTimeADT(t2))
/*
* If both endpoints of interval 1 are null, the result is null (unknown).
* If just one endpoint is null, take ts1 as the non-null one.
* Otherwise, take ts1 as the lesser endpoint.
*/
if (ts1IsNull)
{ {
TimeADT tt = ts1; if (te1IsNull)
PG_RETURN_NULL();
/* swap null for non-null */
ts1 = te1; ts1 = te1;
te1 = tt; te1IsNull = true;
} }
if (ts2 > te2) else if (!te1IsNull)
{ {
TimeADT tt = ts2; if (TIMEADT_GT(ts1, te1))
{
Datum tt = ts1;
ts2 = te2; ts1 = te1;
te2 = tt; te1 = tt;
}
} }
PG_RETURN_BOOL((ts1 > ts2 && (ts1 < te2 || te1 < te2)) || /* Likewise for interval 2. */
(ts1 < ts2 && (ts2 < te1 || te2 < te1)) || if (ts2IsNull)
(ts1 == ts2)); {
if (te2IsNull)
PG_RETURN_NULL();
/* swap null for non-null */
ts2 = te2;
te2IsNull = true;
}
else if (!te2IsNull)
{
if (TIMEADT_GT(ts2, te2))
{
Datum tt = ts2;
ts2 = te2;
te2 = tt;
}
}
/*
* At this point neither ts1 nor ts2 is null, so we can consider three
* cases: ts1 > ts2, ts1 < ts2, ts1 = ts2
*/
if (TIMEADT_GT(ts1, ts2))
{
/* This case is ts1 < te2 OR te1 < te2, which may look redundant
* but in the presence of nulls it's not quite completely so.
*/
if (te2IsNull)
PG_RETURN_NULL();
if (TIMEADT_LT(ts1, te2))
PG_RETURN_BOOL(true);
if (te1IsNull)
PG_RETURN_NULL();
/* If te1 is not null then we had ts1 <= te1 above, and we just
* found ts1 >= te2, hence te1 >= te2.
*/
PG_RETURN_BOOL(false);
}
else if (TIMEADT_LT(ts1, ts2))
{
/* This case is ts2 < te1 OR te2 < te1 */
if (te1IsNull)
PG_RETURN_NULL();
if (TIMEADT_LT(ts2, te1))
PG_RETURN_BOOL(true);
if (te2IsNull)
PG_RETURN_NULL();
/* If te2 is not null then we had ts2 <= te2 above, and we just
* found ts2 >= te1, hence te2 >= te1.
*/
PG_RETURN_BOOL(false);
}
else
{
/* For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a
* rather silly way of saying "true if both are nonnull, else null".
*/
if (te1IsNull || te2IsNull)
PG_RETURN_NULL();
PG_RETURN_BOOL(true);
}
#undef TIMEADT_GT
#undef TIMEADT_LT
} }
/* timestamp_time() /* timestamp_time()
@ -964,53 +1049,122 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
PG_RETURN_TIMETZADT_P(result); PG_RETURN_TIMETZADT_P(result);
} }
/* overlaps_timetz() /* overlaps_timetz() --- implements the SQL92 OVERLAPS operator.
* Implements the SQL92 OVERLAPS operator. *
* Algorithm from Date and Darwen, 1997 * Algorithm is per SQL92 spec. This is much harder than you'd think
* because the spec requires us to deliver a non-null answer in some cases
* where some of the inputs are null.
*/ */
Datum Datum
overlaps_timetz(PG_FUNCTION_ARGS) overlaps_timetz(PG_FUNCTION_ARGS)
{ {
/* The arguments are TimeTzADT *, but we leave them as generic Datums /* The arguments are TimeTzADT *, but we leave them as generic Datums
* for convenience of notation. * for convenience of notation --- and to avoid dereferencing nulls.
*/ */
Datum ts1 = PG_GETARG_DATUM(0); Datum ts1 = PG_GETARG_DATUM(0);
Datum te1 = PG_GETARG_DATUM(1); Datum te1 = PG_GETARG_DATUM(1);
Datum ts2 = PG_GETARG_DATUM(2); Datum ts2 = PG_GETARG_DATUM(2);
Datum te2 = PG_GETARG_DATUM(3); Datum te2 = PG_GETARG_DATUM(3);
bool ts1IsNull = PG_ARGISNULL(0);
bool te1IsNull = PG_ARGISNULL(1);
bool ts2IsNull = PG_ARGISNULL(2);
bool te2IsNull = PG_ARGISNULL(3);
#define TIMETZ_GT(t1,t2) \ #define TIMETZ_GT(t1,t2) \
DatumGetBool(DirectFunctionCall2(timetz_gt,t1,t2)) DatumGetBool(DirectFunctionCall2(timetz_gt,t1,t2))
#define TIMETZ_LT(t1,t2) \ #define TIMETZ_LT(t1,t2) \
DatumGetBool(DirectFunctionCall2(timetz_lt,t1,t2)) DatumGetBool(DirectFunctionCall2(timetz_lt,t1,t2))
#define TIMETZ_EQ(t1,t2) \
DatumGetBool(DirectFunctionCall2(timetz_eq,t1,t2))
/* Make sure we have ordered pairs... */ /*
if (TIMETZ_GT(ts1, te1)) * If both endpoints of interval 1 are null, the result is null (unknown).
* If just one endpoint is null, take ts1 as the non-null one.
* Otherwise, take ts1 as the lesser endpoint.
*/
if (ts1IsNull)
{ {
Datum tt = ts1; if (te1IsNull)
PG_RETURN_NULL();
/* swap null for non-null */
ts1 = te1; ts1 = te1;
te1 = tt; te1IsNull = true;
} }
if (TIMETZ_GT(ts2, te2)) else if (!te1IsNull)
{ {
Datum tt = ts2; if (TIMETZ_GT(ts1, te1))
{
Datum tt = ts1;
ts2 = te2; ts1 = te1;
te2 = tt; te1 = tt;
}
} }
PG_RETURN_BOOL((TIMETZ_GT(ts1, ts2) && /* Likewise for interval 2. */
(TIMETZ_LT(ts1, te2) || TIMETZ_LT(te1, te2))) || if (ts2IsNull)
(TIMETZ_GT(ts2, ts1) && {
(TIMETZ_LT(ts2, te1) || TIMETZ_LT(te2, te1))) || if (te2IsNull)
TIMETZ_EQ(ts1, ts2)); PG_RETURN_NULL();
/* swap null for non-null */
ts2 = te2;
te2IsNull = true;
}
else if (!te2IsNull)
{
if (TIMETZ_GT(ts2, te2))
{
Datum tt = ts2;
ts2 = te2;
te2 = tt;
}
}
/*
* At this point neither ts1 nor ts2 is null, so we can consider three
* cases: ts1 > ts2, ts1 < ts2, ts1 = ts2
*/
if (TIMETZ_GT(ts1, ts2))
{
/* This case is ts1 < te2 OR te1 < te2, which may look redundant
* but in the presence of nulls it's not quite completely so.
*/
if (te2IsNull)
PG_RETURN_NULL();
if (TIMETZ_LT(ts1, te2))
PG_RETURN_BOOL(true);
if (te1IsNull)
PG_RETURN_NULL();
/* If te1 is not null then we had ts1 <= te1 above, and we just
* found ts1 >= te2, hence te1 >= te2.
*/
PG_RETURN_BOOL(false);
}
else if (TIMETZ_LT(ts1, ts2))
{
/* This case is ts2 < te1 OR te2 < te1 */
if (te1IsNull)
PG_RETURN_NULL();
if (TIMETZ_LT(ts2, te1))
PG_RETURN_BOOL(true);
if (te2IsNull)
PG_RETURN_NULL();
/* If te2 is not null then we had ts2 <= te2 above, and we just
* found ts2 >= te1, hence te2 >= te1.
*/
PG_RETURN_BOOL(false);
}
else
{
/* For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a
* rather silly way of saying "true if both are nonnull, else null".
*/
if (te1IsNull || te2IsNull)
PG_RETURN_NULL();
PG_RETURN_BOOL(true);
}
#undef TIMETZ_GT #undef TIMETZ_GT
#undef TIMETZ_LT #undef TIMETZ_LT
#undef TIMETZ_EQ
} }
/* timestamp_timetz() /* timestamp_timetz()

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.39 2000/12/03 20:45:36 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/timestamp.c,v 1.40 2000/12/07 18:38:59 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -825,53 +825,123 @@ interval_hash(PG_FUNCTION_ARGS)
return hash_any((char *) key, sizeof(double) + sizeof(int4)); return hash_any((char *) key, sizeof(double) + sizeof(int4));
} }
/* overlaps_timestamp() /* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
* Implements the SQL92 OVERLAPS operator. *
* Algorithm from Date and Darwen, 1997 * Algorithm is per SQL92 spec. This is much harder than you'd think
* because the spec requires us to deliver a non-null answer in some cases
* where some of the inputs are null.
*/ */
Datum Datum
overlaps_timestamp(PG_FUNCTION_ARGS) overlaps_timestamp(PG_FUNCTION_ARGS)
{ {
/* The arguments are Timestamps, but we leave them as generic Datums /* The arguments are Timestamps, but we leave them as generic Datums
* to avoid unnecessary conversions between value and reference forms... * to avoid unnecessary conversions between value and reference forms
* --- not to mention possible dereferences of null pointers.
*/ */
Datum ts1 = PG_GETARG_DATUM(0); Datum ts1 = PG_GETARG_DATUM(0);
Datum te1 = PG_GETARG_DATUM(1); Datum te1 = PG_GETARG_DATUM(1);
Datum ts2 = PG_GETARG_DATUM(2); Datum ts2 = PG_GETARG_DATUM(2);
Datum te2 = PG_GETARG_DATUM(3); Datum te2 = PG_GETARG_DATUM(3);
bool ts1IsNull = PG_ARGISNULL(0);
bool te1IsNull = PG_ARGISNULL(1);
bool ts2IsNull = PG_ARGISNULL(2);
bool te2IsNull = PG_ARGISNULL(3);
#define TIMESTAMP_GT(t1,t2) \ #define TIMESTAMP_GT(t1,t2) \
DatumGetBool(DirectFunctionCall2(timestamp_gt,t1,t2)) DatumGetBool(DirectFunctionCall2(timestamp_gt,t1,t2))
#define TIMESTAMP_LT(t1,t2) \ #define TIMESTAMP_LT(t1,t2) \
DatumGetBool(DirectFunctionCall2(timestamp_lt,t1,t2)) DatumGetBool(DirectFunctionCall2(timestamp_lt,t1,t2))
#define TIMESTAMP_EQ(t1,t2) \
DatumGetBool(DirectFunctionCall2(timestamp_eq,t1,t2))
/* Make sure we have ordered pairs... */ /*
if (TIMESTAMP_GT(ts1, te1)) * If both endpoints of interval 1 are null, the result is null (unknown).
* If just one endpoint is null, take ts1 as the non-null one.
* Otherwise, take ts1 as the lesser endpoint.
*/
if (ts1IsNull)
{ {
Datum tt = ts1; if (te1IsNull)
PG_RETURN_NULL();
/* swap null for non-null */
ts1 = te1; ts1 = te1;
te1 = tt; te1IsNull = true;
} }
if (TIMESTAMP_GT(ts2, te2)) else if (!te1IsNull)
{ {
Datum tt = ts2; if (TIMESTAMP_GT(ts1, te1))
{
Datum tt = ts1;
ts2 = te2; ts1 = te1;
te2 = tt; te1 = tt;
}
} }
PG_RETURN_BOOL((TIMESTAMP_GT(ts1, ts2) && /* Likewise for interval 2. */
(TIMESTAMP_LT(ts1, te2) || TIMESTAMP_LT(te1, te2))) || if (ts2IsNull)
(TIMESTAMP_GT(ts2, ts1) && {
(TIMESTAMP_LT(ts2, te1) || TIMESTAMP_LT(te2, te1))) || if (te2IsNull)
TIMESTAMP_EQ(ts1, ts2)); PG_RETURN_NULL();
/* swap null for non-null */
ts2 = te2;
te2IsNull = true;
}
else if (!te2IsNull)
{
if (TIMESTAMP_GT(ts2, te2))
{
Datum tt = ts2;
ts2 = te2;
te2 = tt;
}
}
/*
* At this point neither ts1 nor ts2 is null, so we can consider three
* cases: ts1 > ts2, ts1 < ts2, ts1 = ts2
*/
if (TIMESTAMP_GT(ts1, ts2))
{
/* This case is ts1 < te2 OR te1 < te2, which may look redundant
* but in the presence of nulls it's not quite completely so.
*/
if (te2IsNull)
PG_RETURN_NULL();
if (TIMESTAMP_LT(ts1, te2))
PG_RETURN_BOOL(true);
if (te1IsNull)
PG_RETURN_NULL();
/* If te1 is not null then we had ts1 <= te1 above, and we just
* found ts1 >= te2, hence te1 >= te2.
*/
PG_RETURN_BOOL(false);
}
else if (TIMESTAMP_LT(ts1, ts2))
{
/* This case is ts2 < te1 OR te2 < te1 */
if (te1IsNull)
PG_RETURN_NULL();
if (TIMESTAMP_LT(ts2, te1))
PG_RETURN_BOOL(true);
if (te2IsNull)
PG_RETURN_NULL();
/* If te2 is not null then we had ts2 <= te2 above, and we just
* found ts2 >= te1, hence te2 >= te1.
*/
PG_RETURN_BOOL(false);
}
else
{
/* For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a
* rather silly way of saying "true if both are nonnull, else null".
*/
if (te1IsNull || te2IsNull)
PG_RETURN_NULL();
PG_RETURN_BOOL(true);
}
#undef TIMESTAMP_GT #undef TIMESTAMP_GT
#undef TIMESTAMP_LT #undef TIMESTAMP_LT
#undef TIMESTAMP_EQ
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: pg_proc.h,v 1.176 2000/12/03 14:51:09 thomas Exp $ * $Id: pg_proc.h,v 1.177 2000/12/07 18:38:58 tgl Exp $
* *
* NOTES * NOTES
* The script catalog/genbki.sh reads this file and generates .bki * The script catalog/genbki.sh reads this file and generates .bki
@ -1510,7 +1510,7 @@ DESCR("absolute value");
DATA(insert OID = 1263 ( interval PGUID 12 f t f t 1 f 1186 "25" 100 0 0 100 text_interval - )); DATA(insert OID = 1263 ( interval PGUID 12 f t f t 1 f 1186 "25" 100 0 0 100 text_interval - ));
DESCR("convert text to interval"); DESCR("convert text to interval");
DATA(insert OID = 1271 ( overlaps PGUID 12 f t t t 4 f 16 "1266 1266 1266 1266" 100 0 0 100 overlaps_timetz - )); DATA(insert OID = 1271 ( overlaps PGUID 12 f t t f 4 f 16 "1266 1266 1266 1266" 100 0 0 100 overlaps_timetz - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1272 ( datetime_pl PGUID 12 f t f t 2 f 1184 "1082 1083" 100 0 0 100 datetime_timestamp - )); DATA(insert OID = 1272 ( datetime_pl PGUID 12 f t f t 2 f 1184 "1082 1083" 100 0 0 100 datetime_timestamp - ));
DESCR("convert date and time to timestamp"); DESCR("convert date and time to timestamp");
@ -1569,22 +1569,22 @@ DESCR("restriction selectivity for containment comparison operators");
DATA(insert OID = 1303 ( contjoinsel PGUID 12 f t f t 5 f 701 "26 26 21 26 21" 100 0 0 100 contjoinsel - )); DATA(insert OID = 1303 ( contjoinsel PGUID 12 f t f t 5 f 701 "26 26 21 26 21" 100 0 0 100 contjoinsel - ));
DESCR("join selectivity for containment comparison operators"); DESCR("join selectivity for containment comparison operators");
DATA(insert OID = 1304 ( overlaps PGUID 12 f t t t 4 f 16 "1184 1184 1184 1184" 100 0 0 100 overlaps_timestamp - )); DATA(insert OID = 1304 ( overlaps PGUID 12 f t t f 4 f 16 "1184 1184 1184 1184" 100 0 0 100 overlaps_timestamp - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1305 ( overlaps PGUID 14 f t t t 4 f 16 "1184 1186 1184 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - )); DATA(insert OID = 1305 ( overlaps PGUID 14 f t t f 4 f 16 "1184 1186 1184 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1306 ( overlaps PGUID 14 f t t t 4 f 16 "1184 1184 1184 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - )); DATA(insert OID = 1306 ( overlaps PGUID 14 f t t f 4 f 16 "1184 1184 1184 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1307 ( overlaps PGUID 14 f t t t 4 f 16 "1184 1186 1184 1184" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - )); DATA(insert OID = 1307 ( overlaps PGUID 14 f t t f 4 f 16 "1184 1186 1184 1184" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1308 ( overlaps PGUID 12 f t t t 4 f 16 "1083 1083 1083 1083" 100 0 0 100 overlaps_time - )); DATA(insert OID = 1308 ( overlaps PGUID 12 f t t f 4 f 16 "1083 1083 1083 1083" 100 0 0 100 overlaps_time - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1309 ( overlaps PGUID 14 f t t t 4 f 16 "1083 1186 1083 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - )); DATA(insert OID = 1309 ( overlaps PGUID 14 f t t f 4 f 16 "1083 1186 1083 1186" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1310 ( overlaps PGUID 14 f t t t 4 f 16 "1083 1083 1083 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - )); DATA(insert OID = 1310 ( overlaps PGUID 14 f t t f 4 f 16 "1083 1083 1083 1186" 100 0 0 100 "select ($1, $2) overlaps ($3, ($3 + $4))" - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1311 ( overlaps PGUID 14 f t t t 4 f 16 "1083 1186 1083 1083" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - )); DATA(insert OID = 1311 ( overlaps PGUID 14 f t t f 4 f 16 "1083 1186 1083 1083" 100 0 0 100 "select ($1, ($1 + $2)) overlaps ($3, $4)" - ));
DESCR("SQL92 interval comparison"); DESCR("SQL92 interval comparison");
DATA(insert OID = 1314 ( timestamp_cmp PGUID 12 f t f t 2 f 23 "1184 1184" 100 0 0 100 timestamp_cmp - )); DATA(insert OID = 1314 ( timestamp_cmp PGUID 12 f t f t 2 f 23 "1184 1184" 100 0 0 100 timestamp_cmp - ));