Implement width_bucket() for the float8 data type.

The implementation is somewhat ugly logic-wise, but I don't see an
easy way to make it more concise.

When writing this, I noticed that my previous implementation of
width_bucket() doesn't handle NaN correctly:

    postgres=# select width_bucket('NaN', 1, 5, 5);
     width_bucket
    --------------
                6
    (1 row)

AFAICS SQL:2003 does not define a NaN value, so it doesn't address how
width_bucket() should behave here. The patch changes width_bucket() so
that ereport(ERROR) is raised if NaN is specified for the operand or the
lower or upper bounds to width_bucket(). For float8, NaN is disallowed
for any of the floating-point inputs, and +/- infinity is disallowed
for the histogram bounds (but allowed for the operand).

Update docs and regression tests, bump the catversion.
This commit is contained in:
Neil Conway 2007-01-16 21:41:14 +00:00
parent da07c81fe3
commit cf57ef4e50
8 changed files with 210 additions and 69 deletions

View File

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.350 2007/01/12 23:34:54 tgl Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.351 2007/01/16 21:41:12 neilc Exp $ -->
<chapter id="functions">
<title>Functions and Operators</title>
@ -854,6 +854,16 @@
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
<entry><literal>3</literal></entry>
</row>
<row>
<entry><literal><function>width_bucket</function>(<parameter>op</parameter> <type>dp</type>, <parameter>b1</parameter> <type>dp</type>, <parameter>b2</parameter> <type>dp</type>, <parameter>count</parameter> <type>int</type>)</literal></entry>
<entry><type>int</type></entry>
<entry>return the bucket to which <parameter>operand</> would
be assigned in an equidepth histogram with <parameter>count</>
buckets, in the range <parameter>b1</> to <parameter>b2</></entry>
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
<entry><literal>3</literal></entry>
</row>
</tbody>
</tgroup>
</table>

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.146 2007/01/06 20:21:29 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/float.c,v 1.147 2007/01/16 21:41:13 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -2745,6 +2745,88 @@ float84ge(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(float8_cmp_internal(arg1, arg2) >= 0);
}
/*
* Implements the float8 version of the width_bucket() function
* defined by SQL2003. See also width_bucket_numeric().
*
* 'bound1' and 'bound2' are the lower and upper bounds of the
* histogram's range, respectively. 'count' is the number of buckets
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs to in an equiwidth histogram
* with the specified characteristics. An operand smaller than the
* lower bound is assigned to bucket 0. An operand greater than the
* upper bound is assigned to an additional bucket (with number
* count+1). We don't allow "NaN" for any of the float8 inputs, and we
* don't allow either of the histogram bounds to be +/- infinity.
*/
Datum
width_bucket_float8(PG_FUNCTION_ARGS)
{
float8 operand = PG_GETARG_FLOAT8(0);
float8 bound1 = PG_GETARG_FLOAT8(1);
float8 bound2 = PG_GETARG_FLOAT8(2);
int32 count = PG_GETARG_INT32(3);
int32 result;
if (count <= 0.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
if (isnan(operand) || isnan(bound1) || isnan(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("operand, lower bound and upper bound cannot be NaN")));
/* Note that we allow "operand" to be infinite */
if (is_infinite(bound1) || is_infinite(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("lower and upper bounds must be finite")));
if (bound1 < bound2)
{
if (operand < bound1)
result = 0;
else if (operand >= bound2)
{
result = count + 1;
/* check for overflow */
if (result < count)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
}
else
result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1;
}
else if (bound1 > bound2)
{
if (operand > bound1)
result = 0;
else if (operand <= bound2)
{
result = count + 1;
/* check for overflow */
if (result < count)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
}
else
result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("lower bound cannot equal upper bound")));
result = 0; /* keep the compiler quiet */
}
PG_RETURN_INT32(result);
}
/* ========== PRIVATE ROUTINES ========== */
#ifndef HAVE_CBRT

View File

@ -14,7 +14,7 @@
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.98 2007/01/05 22:19:41 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.99 2007/01/16 21:41:13 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -874,16 +874,17 @@ numeric_floor(PG_FUNCTION_ARGS)
}
/*
* width_bucket_numeric() -
* Implements the numeric version of the width_bucket() function
* defined by SQL2003. See also width_bucket_float8().
*
* 'bound1' and 'bound2' are the lower and upper bounds of the
* histogram's range, respectively. 'count' is the number of buckets
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs in for an equiwidth histogram
* bucket number that 'operand' belongs to in an equiwidth histogram
* with the specified characteristics. An operand smaller than the
* lower bound is assigned to bucket 0. An operand greater than the
* upper bound is assigned to an additional bucket (with number
* count+1).
* count+1). We don't allow "NaN" for any of the numeric arguments.
*/
Datum
width_bucket_numeric(PG_FUNCTION_ARGS)
@ -901,6 +902,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
if (NUMERIC_IS_NAN(operand) ||
NUMERIC_IS_NAN(bound1) ||
NUMERIC_IS_NAN(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("operand, lower bound and upper bound cannot be NaN")));
init_var(&result_var);
init_var(&count_var);
@ -937,6 +945,7 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
break;
}
/* if result exceeds the range of a legal int4, we ereport here */
result = numericvar_to_int4(&result_var);
free_var(&count_var);
@ -946,8 +955,6 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
}
/*
* compute_bucket() -
*
* If 'operand' is not outside the bucket range, determine the correct
* bucket for it to go. The calculations performed by this function
* are derived directly from the SQL2003 spec.

View File

@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.371 2007/01/09 02:14:15 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.372 2007/01/16 21:41:13 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200701081
#define CATALOG_VERSION_NO 200701161
#endif

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.436 2007/01/05 22:19:53 momjian Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.437 2007/01/16 21:41:13 neilc Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@ -636,6 +636,8 @@ DATA(insert OID = 309 ( float84gt PGNSP PGUID 12 f f t f i 2 16 "701 700" _
DESCR("greater-than");
DATA(insert OID = 310 ( float84ge PGNSP PGUID 12 f f t f i 2 16 "701 700" _null_ _null_ _null_ float84ge - _null_ ));
DESCR("greater-than-or-equal");
DATA(insert OID = 320 ( width_bucket PGNSP PGUID 12 f f t f i 4 23 "701 701 701 23" _null_ _null_ _null_ width_bucket_float8 - _null_ ));
DESCR("bucket number of operand in equidepth histogram");
DATA(insert OID = 311 ( float8 PGNSP PGUID 12 f f t f i 1 701 "700" _null_ _null_ _null_ ftod - _null_ ));
DESCR("convert float4 to float8");

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.285 2007/01/05 22:19:58 momjian Exp $
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.286 2007/01/16 21:41:14 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -380,6 +380,7 @@ extern Datum float84lt(PG_FUNCTION_ARGS);
extern Datum float84le(PG_FUNCTION_ARGS);
extern Datum float84gt(PG_FUNCTION_ARGS);
extern Datum float84ge(PG_FUNCTION_ARGS);
extern Datum width_bucket_float8(PG_FUNCTION_ARGS);
/* dbsize.c */
extern Datum pg_tablespace_size_oid(PG_FUNCTION_ARGS);

View File

@ -730,55 +730,77 @@ SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
(7 rows)
DROP TABLE ceil_floor_round;
-- Testing for width_bucket()
-- NULL result
SELECT width_bucket(NULL, NULL, NULL, NULL);
width_bucket
--------------
(1 row)
-- Testing for width_bucket(). For convenience, we test both the
-- numeric and float8 versions of the function in this file.
-- errors
SELECT width_bucket(5.0, 3.0, 4.0, 0);
ERROR: count must be greater than zero
SELECT width_bucket(5.0, 3.0, 4.0, -5);
ERROR: count must be greater than zero
SELECT width_bucket(3.0, 3.0, 3.0, 888);
SELECT width_bucket(3.5, 3.0, 3.0, 888);
ERROR: lower bound cannot equal upper bound
SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
ERROR: count must be greater than zero
SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
ERROR: count must be greater than zero
SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
ERROR: lower bound cannot equal upper bound
SELECT width_bucket('NaN', 3.0, 4.0, 888);
ERROR: operand, lower bound and upper bound cannot be NaN
SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
ERROR: operand, lower bound and upper bound cannot be NaN
-- normal operation
CREATE TABLE width_bucket_test (operand numeric);
COPY width_bucket_test FROM stdin;
CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
COPY width_bucket_test (operand_num) FROM stdin;
UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
SELECT
operand,
width_bucket(operand, 0, 10, 5) AS wb_1,
width_bucket(operand, 10, 0, 5) AS wb_2,
width_bucket(operand, 2, 8, 4) AS wb_3,
width_bucket(operand, 5.0, 5.5, 20) AS wb_4,
width_bucket(operand, -25, 25, 10) AS wb_5
operand_num, operand_f8,
width_bucket(operand_num, 0, 10, 5) AS wb_1,
width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
width_bucket(operand_num, 10, 0, 5) AS wb_2,
width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
width_bucket(operand_num, 2, 8, 4) AS wb_3,
width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
width_bucket(operand_num, -25, 25, 10) AS wb_5,
width_bucket(operand_f8, -25, 25, 10) AS wb_5f
FROM width_bucket_test;
operand | wb_1 | wb_2 | wb_3 | wb_4 | wb_5
------------------+------+------+------+------+------
-5.2 | 0 | 6 | 0 | 0 | 4
-0.0000000000001 | 0 | 6 | 0 | 0 | 5
0.0000000000001 | 1 | 5 | 0 | 0 | 6
1 | 1 | 5 | 0 | 0 | 6
1.99999999999999 | 1 | 5 | 0 | 0 | 6
2 | 2 | 5 | 1 | 0 | 6
2.00000000000001 | 2 | 4 | 1 | 0 | 6
3 | 2 | 4 | 1 | 0 | 6
4 | 3 | 4 | 2 | 0 | 6
4.5 | 3 | 3 | 2 | 0 | 6
5 | 3 | 3 | 3 | 1 | 7
5.5 | 3 | 3 | 3 | 21 | 7
6 | 4 | 3 | 3 | 21 | 7
7 | 4 | 2 | 4 | 21 | 7
8 | 5 | 2 | 5 | 21 | 7
9 | 5 | 1 | 5 | 21 | 7
9.99999999999999 | 5 | 1 | 5 | 21 | 7
10 | 6 | 1 | 5 | 21 | 8
10.0000000000001 | 6 | 0 | 5 | 21 | 8
NaN | 6 | 0 | 5 | 21 | 11
(20 rows)
operand_num | operand_f8 | wb_1 | wb_1f | wb_2 | wb_2f | wb_3 | wb_3f | wb_4 | wb_4f | wb_5 | wb_5f
------------------+------------------+------+-------+------+-------+------+-------+------+-------+------+-------
-5.2 | -5.2 | 0 | 0 | 6 | 6 | 0 | 0 | 0 | 0 | 4 | 4
-0.0000000001 | -1e-10 | 0 | 0 | 6 | 6 | 0 | 0 | 0 | 0 | 5 | 5
0.000000000001 | 1e-12 | 1 | 1 | 5 | 5 | 0 | 0 | 0 | 0 | 6 | 6
1 | 1 | 1 | 1 | 5 | 5 | 0 | 0 | 0 | 0 | 6 | 6
1.99999999999999 | 1.99999999999999 | 1 | 1 | 5 | 5 | 0 | 0 | 0 | 0 | 6 | 6
2 | 2 | 2 | 2 | 5 | 5 | 1 | 1 | 0 | 0 | 6 | 6
2.00000000000001 | 2.00000000000001 | 2 | 2 | 4 | 4 | 1 | 1 | 0 | 0 | 6 | 6
3 | 3 | 2 | 2 | 4 | 4 | 1 | 1 | 0 | 0 | 6 | 6
4 | 4 | 3 | 3 | 4 | 4 | 2 | 2 | 0 | 0 | 6 | 6
4.5 | 4.5 | 3 | 3 | 3 | 3 | 2 | 2 | 0 | 0 | 6 | 6
5 | 5 | 3 | 3 | 3 | 3 | 3 | 3 | 1 | 1 | 7 | 7
5.5 | 5.5 | 3 | 3 | 3 | 3 | 3 | 3 | 21 | 21 | 7 | 7
6 | 6 | 4 | 4 | 3 | 3 | 3 | 3 | 21 | 21 | 7 | 7
7 | 7 | 4 | 4 | 2 | 2 | 4 | 4 | 21 | 21 | 7 | 7
8 | 8 | 5 | 5 | 2 | 2 | 5 | 5 | 21 | 21 | 7 | 7
9 | 9 | 5 | 5 | 1 | 1 | 5 | 5 | 21 | 21 | 7 | 7
9.99999999999999 | 9.99999999999999 | 5 | 5 | 1 | 1 | 5 | 5 | 21 | 21 | 7 | 7
10 | 10 | 6 | 6 | 1 | 1 | 5 | 5 | 21 | 21 | 8 | 8
10.0000000000001 | 10.0000000000001 | 6 | 6 | 0 | 0 | 5 | 5 | 21 | 21 | 8 | 8
(19 rows)
-- for float8 only, check positive and negative infinity: we require
-- finite bucket bounds, but allow an infinite operand
SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
ERROR: lower and upper bounds must be finite
SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
ERROR: lower and upper bounds must be finite
SELECT width_bucket('Infinity'::float8, 1, 10, 10),
width_bucket('-Infinity'::float8, 1, 10, 10);
width_bucket | width_bucket
--------------+--------------
11 | 0
(1 row)
DROP TABLE width_bucket_test;
-- TO_CHAR()
@ -800,7 +822,7 @@ SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')
(10 rows)
SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
FROM num_data;
FROM num_data;
to_char_2 | to_char
-----------+--------------------------------------------
| .000,000,000,000,000

View File

@ -667,22 +667,26 @@ INSERT INTO ceil_floor_round VALUES ('-0.000001');
SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
DROP TABLE ceil_floor_round;
-- Testing for width_bucket()
-- NULL result
SELECT width_bucket(NULL, NULL, NULL, NULL);
-- Testing for width_bucket(). For convenience, we test both the
-- numeric and float8 versions of the function in this file.
-- errors
SELECT width_bucket(5.0, 3.0, 4.0, 0);
SELECT width_bucket(5.0, 3.0, 4.0, -5);
SELECT width_bucket(3.0, 3.0, 3.0, 888);
SELECT width_bucket(3.5, 3.0, 3.0, 888);
SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0);
SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5);
SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888);
SELECT width_bucket('NaN', 3.0, 4.0, 888);
SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888);
-- normal operation
CREATE TABLE width_bucket_test (operand numeric);
CREATE TABLE width_bucket_test (operand_num numeric, operand_f8 float8);
COPY width_bucket_test FROM stdin;
COPY width_bucket_test (operand_num) FROM stdin;
-5.2
-0.0000000000001
0.0000000000001
-0.0000000001
0.000000000001
1
1.99999999999999
2
@ -699,18 +703,31 @@ COPY width_bucket_test FROM stdin;
9.99999999999999
10
10.0000000000001
NaN
\.
UPDATE width_bucket_test SET operand_f8 = operand_num::float8;
SELECT
operand,
width_bucket(operand, 0, 10, 5) AS wb_1,
width_bucket(operand, 10, 0, 5) AS wb_2,
width_bucket(operand, 2, 8, 4) AS wb_3,
width_bucket(operand, 5.0, 5.5, 20) AS wb_4,
width_bucket(operand, -25, 25, 10) AS wb_5
operand_num, operand_f8,
width_bucket(operand_num, 0, 10, 5) AS wb_1,
width_bucket(operand_f8, 0, 10, 5) AS wb_1f,
width_bucket(operand_num, 10, 0, 5) AS wb_2,
width_bucket(operand_f8, 10, 0, 5) AS wb_2f,
width_bucket(operand_num, 2, 8, 4) AS wb_3,
width_bucket(operand_f8, 2, 8, 4) AS wb_3f,
width_bucket(operand_num, 5.0, 5.5, 20) AS wb_4,
width_bucket(operand_f8, 5.0, 5.5, 20) AS wb_4f,
width_bucket(operand_num, -25, 25, 10) AS wb_5,
width_bucket(operand_f8, -25, 25, 10) AS wb_5f
FROM width_bucket_test;
-- for float8 only, check positive and negative infinity: we require
-- finite bucket bounds, but allow an infinite operand
SELECT width_bucket(0.0::float8, 'Infinity'::float8, 5, 10); -- error
SELECT width_bucket(0.0::float8, 5, '-Infinity'::float8, 20); -- error
SELECT width_bucket('Infinity'::float8, 1, 10, 10),
width_bucket('-Infinity'::float8, 1, 10, 10);
DROP TABLE width_bucket_test;
-- TO_CHAR()
@ -719,7 +736,7 @@ SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')
FROM num_data;
SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
FROM num_data;
FROM num_data;
SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR')
FROM num_data;