From 9d5c0af5862cc0141ea16ca90a9e01250716af37 Mon Sep 17 00:00:00 2001 From: "Marc G. Fournier" Date: Thu, 3 Apr 1997 19:56:47 +0000 Subject: [PATCH] From: Thomas Lockhart Subject: [HACKERS] Aggregate function patches Here are the aggregate function patches I originally sent in last December. They fix sum() and avg() behavior for ints and floats when NULL values are involved. I was waiting to resubmit these until I had a chance to write a v6.0->v6.1 database upgrade script to ensure that existing v6.0 databases which have not been reloaded for v6.1 do no break with the new aggregate behavior. These scripts are included below. It's OK with me if someone wants to do something different with the upgrade strategy, but something like this was discussed a few weeks ago. Also, there were a couple of small items which cropped up in doing a clean install of 970403 (actually 970402 + 970403 changes since the full 970403 tar file appears to be damaged or at least suspect). They are the first two patches below and can be omitted if desired (although I think they aren't dangerous :). --- src/backend/executor/nodeAgg.c | 7 +++- src/backend/optimizer/geqo/geqo_eval.c | 6 ++- src/backend/tcop/aclchk.c | 4 +- src/include/catalog/pg_aggregate.h | 28 ++++++------- src/update6_0-6_1.sh | 56 ++++++++++++++++++++++++++ src/update6_0-6_1.sql | 47 +++++++++++++++++++++ 6 files changed, 127 insertions(+), 21 deletions(-) create mode 100644 src/update6_0-6_1.sh create mode 100644 src/update6_0-6_1.sql diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 4f1a5902f0..c6e5b269cd 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -368,7 +368,12 @@ ExecAgg(Agg *node) char *args[2]; AggFuncInfo *aggfns = &aggFuncInfo[i]; - if (aggfns->finalfn && nTuplesAgged > 0) { + if (noInitValue[i]) { + /* + * No values found for this agg; return current state. + * This seems to fix behavior for avg() aggregate. -tgl 12/96 + */ + } else if (aggfns->finalfn && nTuplesAgged > 0) { if (aggfns->finalfn_nargs > 1) { args[0] = (char*)value1[i]; args[1] = (char*)value2[i]; diff --git a/src/backend/optimizer/geqo/geqo_eval.c b/src/backend/optimizer/geqo/geqo_eval.c index 72a10ee9df..cec12ae0ea 100644 --- a/src/backend/optimizer/geqo/geqo_eval.c +++ b/src/backend/optimizer/geqo/geqo_eval.c @@ -5,7 +5,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: geqo_eval.c,v 1.6 1997/03/03 23:26:45 scrappy Exp $ + * $Id: geqo_eval.c,v 1.7 1997/04/03 19:55:35 scrappy Exp $ * *------------------------------------------------------------------------- */ @@ -23,7 +23,9 @@ #include #ifdef HAVE_LIMITS_H # include -# define MAXINT INT_MAX +# ifndef MAXINT +# define MAXINT INT_MAX +# endif #else # include #endif diff --git a/src/backend/tcop/aclchk.c b/src/backend/tcop/aclchk.c index 20748e16c6..7ca6bdd33b 100644 --- a/src/backend/tcop/aclchk.c +++ b/src/backend/tcop/aclchk.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/Attic/aclchk.c,v 1.7 1997/03/12 20:48:17 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/Attic/aclchk.c,v 1.8 1997/04/03 19:55:12 scrappy Exp $ * * NOTES * See acl.h. @@ -277,7 +277,7 @@ aclcheck(Acl *acl, AclId id, AclIdType idtype, AclMode mode) * the system never creates an empty ACL. */ if (num < 1) { -#ifdef ACLDEBUG_TRACE || 1 +#if ACLDEBUG_TRACE || 1 elog(DEBUG, "aclcheck: zero-length ACL, returning 1"); #endif return ACLCHECK_OK; diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h index b2d9b4afb9..dcf7a79df6 100644 --- a/src/include/catalog/pg_aggregate.h +++ b/src/include/catalog/pg_aggregate.h @@ -7,7 +7,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: pg_aggregate.h,v 1.4 1997/04/02 18:36:09 scrappy Exp $ + * $Id: pg_aggregate.h,v 1.5 1997/04/03 19:56:19 scrappy Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -19,7 +19,7 @@ #define PG_AGGREGATE_H /* ---------------- - * postgres.h contains the system type definintions and the + * postgres.h contains the system type definitions and the * CATALOG(), BOOTSTRAP and DATA() sugar words so this file * can be read by both genbki.sh and the C compiler. * ---------------- @@ -87,15 +87,15 @@ typedef FormData_pg_aggregate *Form_pg_aggregate; * --------------- */ -DATA(insert OID = 0 ( avg PGUID int4pl int4inc int4div 23 23 23 23 0 0 )); -DATA(insert OID = 0 ( avg PGUID int2pl int2inc int2div 21 21 21 21 0 0 )); -DATA(insert OID = 0 ( avg PGUID float4pl float4inc float4div 700 700 700 700 0.0 0.0 )); -DATA(insert OID = 0 ( avg PGUID float8pl float8inc float8div 701 701 701 701 0.0 0.0 )); +DATA(insert OID = 0 ( avg PGUID int4pl int4inc int4div 23 23 23 23 _null_ 0 )); +DATA(insert OID = 0 ( avg PGUID int2pl int2inc int2div 21 21 21 21 _null_ 0 )); +DATA(insert OID = 0 ( avg PGUID float4pl float4inc float4div 700 700 700 700 _null_ 0.0 )); +DATA(insert OID = 0 ( avg PGUID float8pl float8inc float8div 701 701 701 701 _null_ 0.0 )); -DATA(insert OID = 0 ( sum PGUID int4pl - - 23 23 0 23 0 _null_ )); -DATA(insert OID = 0 ( sum PGUID int2pl - - 21 21 0 21 0 _null_ )); -DATA(insert OID = 0 ( sum PGUID float4pl - - 700 700 0 700 0.0 _null_ )); -DATA(insert OID = 0 ( sum PGUID float8pl - - 701 701 0 701 0.0 _null_ )); +DATA(insert OID = 0 ( sum PGUID int4pl - - 23 23 0 23 _null_ _null_ )); +DATA(insert OID = 0 ( sum PGUID int2pl - - 21 21 0 21 _null_ _null_ )); +DATA(insert OID = 0 ( sum PGUID float4pl - - 700 700 0 700 _null_ _null_ )); +DATA(insert OID = 0 ( sum PGUID float8pl - - 701 701 0 701 _null_ _null_ )); DATA(insert OID = 0 ( max PGUID int4larger - - 23 23 0 23 _null_ _null_ )); DATA(insert OID = 0 ( max PGUID int2larger - - 21 21 0 21 _null_ _null_ )); @@ -113,10 +113,10 @@ DATA(insert OID = 0 ( min PGUID int4smaller - - 702 702 0 702 _null_ _nul DATA(insert OID = 0 ( min PGUID date_smaller - - 1082 1082 0 1082 _null_ _null_ )); DATA(insert OID = 0 ( min PGUID float8smaller - - 1084 1084 0 1084 _null_ _null_ )); -DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23 _null_ 0 )); +DATA(insert OID = 0 ( count PGUID - int4inc - 0 0 23 23 _null_ 0 )); /* - * prototypes for fucnctions in pg_aggregate.c + * prototypes for functions in pg_aggregate.c */ extern void AggregateCreate(char *aggName, char *aggtransfn1Name, @@ -131,7 +131,3 @@ extern char *AggNameGetInitVal(char *aggName, Oid basetype, int xfuncno, bool *isNull); #endif /* PG_AGGREGATE_H */ - - - - diff --git a/src/update6_0-6_1.sh b/src/update6_0-6_1.sh new file mode 100644 index 0000000000..44b3c9aa7b --- /dev/null +++ b/src/update6_0-6_1.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# update +# Script to apply patches to existing databases +# to upgrade from Postgres v6.0 to v6.1. +echo "" +echo "This utility does a minimal upgrade for existing v6.0 databases." +echo "Note that several new features and functions in Postgres" +echo " will not be available unless the databases are reloaded" +echo " from a clean install of v6.1." +echo "" +echo "This update script is not necessary for new or reloaded databases," +echo " but will not damage them. You should update existing v6.1beta" +echo " databases created on or before 1997-04-04 (when the patches for" +echo " aggregate functions were applied to the v6.1beta source tree)." +echo "" +echo "Features present with this simple update include:" +echo " - aggregate operators sum() and avg() behave correctly with NULLs" +echo " - the point distance operator '<===>' returns float8 rather than int4" +echo " - some duplicate function OIDs are renumbered to eliminate conflicts" +echo "" +echo "Features unavailable with only this simple update include:" +echo " - new string handling functions a la Oracle/Ingres" +echo " - new date and time data types and expanded functionality" +echo " - some new function overloading to simplify function names" +echo "" +echo "Note that if v6.0 databases are not reloaded from a clean install of v6.1" +echo " or if this update is not applied to existing v6.0 databases:" +echo " - aggregate functions avg() and sum() may divide-by-zero for int4 data types" +# +srcdir=`pwd` +srcsql="update6_0-6_1.sql" +CMDSQL="psql" +SRCSQL="$srcdir/$srcsql" +# +if [ -z $SRCSQL ]; then + echo "unable to locate $SRCSQL" + exit 1 +fi +# +echo "" +echo "updating databases found in $PGDATA/base" +echo "" +# +cd $PGDATA/base +for d in * +do + echo "updating $d at `date` ..." + echo "try $CMDSQL $d < $SRCSQL" + $CMDSQL $d < $SRCSQL + echo "completed updating $d at `date`" +done +# +echo "" +echo "completed all updates at `date`" +echo "" +exit diff --git a/src/update6_0-6_1.sql b/src/update6_0-6_1.sql new file mode 100644 index 0000000000..e60e8d93cb --- /dev/null +++ b/src/update6_0-6_1.sql @@ -0,0 +1,47 @@ +-- Aggregate functions +-- Thomas Lockhart +-- This fixes the behavior of built-in aggregate functions avg() and sum(). +-- Code tested on postgres95 v1.0.9, postgres v6.0, and postgres v6.1b-970315. +-- Original definitions return zero rather than null for empty set attributes. +-- Postgres source code says that null behavior for aggregates is not correct, +-- but does describe the correct behavior for pass-by-reference data types +-- if it is given null initial values (from pg_aggregate). +-- Note that pass-by-value data types (e.g. int4) require a simple source code +-- change in backend/executor/nodeAgg.c to avoid divide-by-zero results. +-- If this SQL update is applied without making the corresponding source code +-- patch, then floating point types will work correctly but integer types will +-- divide-by-zero. +-- If the source code patch is applied but this SQL update is not, then there +-- will be divide-by-zero results for floating point types. + +-- For aggregate attributes, the correct behavior is as follows: +-- count(*) should return a count of all tuples, null or otherwise +-- count(col) should return a count of all non-null values, zero if none +-- avg(col), sum(col), etc should ignore null fields and return null if there +-- are no non-null inputs +-- Ref: the original Date book + +update pg_aggregate set agginitval1=null + where aggname = 'avg' or aggname = 'sum'; + +-- Geometric functions +-- Thomas Lockhart +-- This replaces the distance operator with one returning a floating point number. +-- The original operator 'pointdist' returned an integer. +-- There is no corresponding source code change required for this patch. + +update pg_operator set oprresult = 701, oprcode = 'point_distance'::regproc + where oprname = '<===>' and oprresult = 23; + +-- Date functions +-- Thomas Lockhart +-- This fixes conflicting OIDs within the date and time declarations. + +update pg_proc set oid = 1138::oid where proname = 'date_larger'; +update pg_proc set oid = 1139::oid where proname = 'date_smaller'; +update pg_proc set oid = 1140::oid where proname = 'date_mi'; +update pg_proc set oid = 1141::oid where proname = 'date_pli'; +update pg_proc set oid = 1142::oid where proname = 'date_mii'; +update pg_proc set oid = 1143::oid where proname = 'timein'; +update pg_proc set oid = 1144::oid where proname = 'timeout'; +update pg_proc set oid = 1145::oid where proname = 'time_eq';