From 6fbb14a17432681f80a98f64cc810a7871a0a757 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 21 Oct 2003 22:51:14 +0000 Subject: [PATCH] Update the complex-datatype example to V1 function calling conventions, and add binary send/receive functions. Fix some other grottiness such as failure to mark the C functions STRICT. --- doc/src/sgml/xoper.sgml | 19 ++- doc/src/sgml/xtypes.sgml | 150 +++++++++++++++------ src/tutorial/complex.c | 251 ++++++++++++++++++++++-------------- src/tutorial/complex.source | 63 ++++++--- 4 files changed, 329 insertions(+), 154 deletions(-) diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml index ab5f38be75..80e6793397 100644 --- a/doc/src/sgml/xoper.sgml +++ b/doc/src/sgml/xoper.sgml @@ -1,5 +1,5 @@ @@ -120,6 +120,23 @@ SELECT (a + b) AS c FROM test_complex; be provided in the COMMUTATOR clause. + + It's critical to provide commutator information for operators that + will be used in indexes and join clauses, because this allows the + query optimizer to flip around such a clause to the forms + needed for different plan types. For example, consider a query with + a WHERE clause like tab1.x = tab2.y, where tab1.x + and tab2.y are of a user-defined type, and suppose that + tab2.y is indexed. The optimizer cannot generate an + indexscan unless it can determine how to flip the clause around to + tab2.y = tab1.x, because the indexscan machinery expects + to see the indexed column on the left of the operator it is given. + PostgreSQL will not simply + assume that this is a valid transformation --- the definer of the + = operator must specify that it is valid, by marking the + operator with commutator information. + + When you are defining a self-commutative operator, you just do it. When you are defining a pair of commutative operators, things are diff --git a/doc/src/sgml/xtypes.sgml b/doc/src/sgml/xtypes.sgml index 11981a5b19..afd35ab700 100644 --- a/doc/src/sgml/xtypes.sgml +++ b/doc/src/sgml/xtypes.sgml @@ -1,5 +1,5 @@ @@ -10,21 +10,21 @@ $Header: /cvsroot/pgsql/doc/src/sgml/xtypes.sgml,v 1.20 2003/08/31 17:32:21 pete user-defined - - This section needs to be updated for the version-1 function manager - interface. - - - As described above, there are two kinds of data types in - PostgreSQL: base types and composite - types. This section describes how to define new base types. + As described in , + PostgreSQL can be extended to support new + data types. This section describes how to define new base types, + which are data types defined below the level of the SQL + language. Creating a new base type requires implementing functions + to operate on the type in a low-level language, usually C. The examples in this section can be found in complex.sql and complex.c - in the tutorial directory. + in the src/tutorial directory of the source distribution. + See the README file in that directory for instructions + about running the examples. @@ -44,12 +44,14 @@ $Header: /cvsroot/pgsql/doc/src/sgml/xtypes.sgml,v 1.20 2003/08/31 17:32:21 pete as its argument and returns the internal (in memory) representation of the type. The output function takes the internal representation of the type as argument and returns a null-terminated character - string. + string. If we want to do anything more with the type than merely + store it, we must provide additional functions to implement whatever + operations we'd like to have for the type. Suppose we want to define a type complex that represents - complex numbers. A natural way to to represent a complex number in + complex numbers. A natural way to represent a complex number in memory would be the following C structure: @@ -59,6 +61,11 @@ typedef struct Complex { } Complex; + We will need to make this a pass-by-reference type, since it's too + large to fit into a single Datum value. + + + As the external string representation of the type, we choose a string of the form (x,y). @@ -71,9 +78,12 @@ typedef struct Complex { input function. For instance: -Complex * -complex_in(char *str) +PG_FUNCTION_INFO_V1(complex_in); + +Datum +complex_in(PG_FUNCTION_ARGS) { + char *str = PG_GETARG_CSTRING(0); double x, y; Complex *result; @@ -81,34 +91,36 @@ complex_in(char *str) if (sscanf(str, " ( %lf , %lf )", &x, &y) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for complex: \"%s\"", str))); + errmsg("invalid input syntax for complex: \"%s\"", + str))); result = (Complex *) palloc(sizeof(Complex)); result->x = x; result->y = y; - return result; + PG_RETURN_POINTER(result); } The output function can simply be: -char * -complex_out(Complex *complex) -{ - char *result; +PG_FUNCTION_INFO_V1(complex_out); - if (complex == NULL) - return(NULL); - result = (char *) palloc(60); - sprintf(result, "(%g,%g)", complex->x, complex->y); - return result; +Datum +complex_out(PG_FUNCTION_ARGS) +{ + Complex *complex = (Complex *) PG_GETARG_POINTER(0); + char *result; + + result = (char *) palloc(100); + snprintf(result, 100, "(%g,%g)", complex->x, complex->y); + PG_RETURN_CSTRING(result); } - You should try to make the input and output functions inverses of + You should be careful to make the input and output functions inverses of each other. If you do not, you will have severe problems when you need to dump your data into a file and then read it back in. This is a particularly common problem when floating-point numbers are @@ -116,34 +128,87 @@ complex_out(Complex *complex) - To define the complex type, we need to create the two - user-defined functions complex_in and - complex_out before creating the type: + Optionally, a user-defined type can provide binary input and output + routines. Binary I/O is normally faster but less portable than textual + I/O. As with textual I/O, it is up to you to define exactly what the + external binary representation is. Most of the built-in datatypes + try to provide a machine-independent binary representation. For + complex, we will piggy-back on the binary I/O converters + for type float8: + + +PG_FUNCTION_INFO_V1(complex_recv); + +Datum +complex_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Complex *result; + + result = (Complex *) palloc(sizeof(Complex)); + result->x = pq_getmsgfloat8(buf); + result->y = pq_getmsgfloat8(buf); + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(complex_send); + +Datum +complex_send(PG_FUNCTION_ARGS) +{ + Complex *complex = (Complex *) PG_GETARG_POINTER(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, complex->x); + pq_sendfloat8(&buf, complex->y); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); +} + + + + + To define the complex type, we need to create the + user-defined I/O functions before creating the type: CREATE FUNCTION complex_in(cstring) RETURNS complex AS 'filename' - LANGUAGE C; + LANGUAGE C IMMUTABLE STRICT; CREATE FUNCTION complex_out(complex) RETURNS cstring AS 'filename' - LANGUAGE C; + LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION complex_recv(internal) + RETURNS complex + AS 'filename' + LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION complex_send(complex) + RETURNS bytea + AS 'filename' + LANGUAGE C IMMUTABLE STRICT; Notice that the declarations of the input and output functions must reference the not-yet-defined type. This is allowed, but will draw - warning messages that may be ignored. + warning messages that may be ignored. The input function must + appear first. Finally, we can declare the data type: CREATE TYPE complex ( - internallength = 16, - input = complex_in, - output = complex_out + internallength = 16, + input = complex_in, + output = complex_out, + receive = complex_recv, + send = complex_send, + alignment = double ); @@ -158,14 +223,25 @@ CREATE TYPE complex ( (_) prepended. + + Once the data type exists, we can declare additional functions to + provide useful operations on the data type. Operators can then be + defined atop the functions, and if needed, operator classes can be + created to support indexing of the data type. These additional + layers are discussed in following sections. + + If the values of your data type might exceed a few hundred bytes in - size (in internal form), you should mark them + size (in internal form), you should make the data type TOAST-able.TOASTand user-defined types To do this, the internal representation must follow the standard layout for variable-length data: the first four bytes must be an int32 containing - the total length in bytes of the datum (including itself). Also, + the total length in bytes of the datum (including itself). The C + functions operating on the data type must be careful to unpack any + toasted values they are handed (this detail can normally be hidden in the + GETARG macros). Then, when running the CREATE TYPE command, specify the internal length as variable and select the appropriate storage option. diff --git a/src/tutorial/complex.c b/src/tutorial/complex.c index 5e8f75ae32..38aafd9ff5 100644 --- a/src/tutorial/complex.c +++ b/src/tutorial/complex.c @@ -6,33 +6,44 @@ #include "postgres.h" +#include "fmgr.h" +#include "libpq/pqformat.h" /* needed for send/recv functions */ + + typedef struct Complex { double x; double y; } Complex; -/* These prototypes declare the requirements that Postgres places on these - user written functions. -*/ -Complex *complex_in(char *str); -char *complex_out(Complex * complex); -Complex *complex_add(Complex * a, Complex * b); -bool complex_abs_lt(Complex * a, Complex * b); -bool complex_abs_le(Complex * a, Complex * b); -bool complex_abs_eq(Complex * a, Complex * b); -bool complex_abs_ge(Complex * a, Complex * b); -bool complex_abs_gt(Complex * a, Complex * b); -int4 complex_abs_cmp(Complex * a, Complex * b); +/* + * Since we use V1 function calling convention, all these functions have + * the same signature as far as C is concerned. We provide these prototypes + * just to forestall warnings when compiled with gcc -Wmissing-prototypes. + */ +Datum complex_in(PG_FUNCTION_ARGS); +Datum complex_out(PG_FUNCTION_ARGS); +Datum complex_recv(PG_FUNCTION_ARGS); +Datum complex_send(PG_FUNCTION_ARGS); +Datum complex_add(PG_FUNCTION_ARGS); +Datum complex_abs_lt(PG_FUNCTION_ARGS); +Datum complex_abs_le(PG_FUNCTION_ARGS); +Datum complex_abs_eq(PG_FUNCTION_ARGS); +Datum complex_abs_ge(PG_FUNCTION_ARGS); +Datum complex_abs_gt(PG_FUNCTION_ARGS); +Datum complex_abs_cmp(PG_FUNCTION_ARGS); /***************************************************************************** * Input/Output functions *****************************************************************************/ -Complex * -complex_in(char *str) +PG_FUNCTION_INFO_V1(complex_in); + +Datum +complex_in(PG_FUNCTION_ARGS) { + char *str = PG_GETARG_CSTRING(0); double x, y; Complex *result; @@ -40,133 +51,173 @@ complex_in(char *str) if (sscanf(str, " ( %lf , %lf )", &x, &y) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for complex: \"%s\"", str))); + errmsg("invalid input syntax for complex: \"%s\"", + str))); result = (Complex *) palloc(sizeof(Complex)); result->x = x; result->y = y; - return result; + PG_RETURN_POINTER(result); } -char * -complex_out(Complex * complex) -{ - char *result; +PG_FUNCTION_INFO_V1(complex_out); - if (complex == NULL) - return NULL; +Datum +complex_out(PG_FUNCTION_ARGS) +{ + Complex *complex = (Complex *) PG_GETARG_POINTER(0); + char *result; result = (char *) palloc(100); snprintf(result, 100, "(%g,%g)", complex->x, complex->y); - return result; + PG_RETURN_CSTRING(result); +} + +/***************************************************************************** + * Binary Input/Output functions + * + * These are optional. + *****************************************************************************/ + +PG_FUNCTION_INFO_V1(complex_recv); + +Datum +complex_recv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + Complex *result; + + result = (Complex *) palloc(sizeof(Complex)); + result->x = pq_getmsgfloat8(buf); + result->y = pq_getmsgfloat8(buf); + PG_RETURN_POINTER(result); +} + +PG_FUNCTION_INFO_V1(complex_send); + +Datum +complex_send(PG_FUNCTION_ARGS) +{ + Complex *complex = (Complex *) PG_GETARG_POINTER(0); + StringInfoData buf; + + pq_begintypsend(&buf); + pq_sendfloat8(&buf, complex->x); + pq_sendfloat8(&buf, complex->y); + PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } /***************************************************************************** * New Operators + * + * A practical Complex datatype would provide much more than this, of course. *****************************************************************************/ -Complex * -complex_add(Complex * a, Complex * b) +PG_FUNCTION_INFO_V1(complex_add); + +Datum +complex_add(PG_FUNCTION_ARGS) { + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); Complex *result; result = (Complex *) palloc(sizeof(Complex)); result->x = a->x + b->x; result->y = a->y + b->y; - return result; + PG_RETURN_POINTER(result); } /***************************************************************************** * Operator class for defining B-tree index + * + * It's essential that the comparison operators and support function for a + * B-tree index opclass always agree on the relative ordering of any two + * data values. Experience has shown that it's depressingly easy to write + * unintentionally inconsistent functions. One way to reduce the odds of + * making a mistake is to make all the functions simple wrappers around + * an internal three-way-comparison function, as we do here. *****************************************************************************/ #define Mag(c) ((c)->x*(c)->x + (c)->y*(c)->y) -bool -complex_abs_lt(Complex * a, Complex * b) -{ - double amag = Mag(a), - bmag = Mag(b); - - return amag < bmag; -} - -bool -complex_abs_le(Complex * a, Complex * b) -{ - double amag = Mag(a), - bmag = Mag(b); - - return amag <= bmag; -} - -bool -complex_abs_eq(Complex * a, Complex * b) -{ - double amag = Mag(a), - bmag = Mag(b); - - return amag == bmag; -} - -bool -complex_abs_ge(Complex * a, Complex * b) -{ - double amag = Mag(a), - bmag = Mag(b); - - return amag >= bmag; -} - -bool -complex_abs_gt(Complex * a, Complex * b) -{ - double amag = Mag(a), - bmag = Mag(b); - - return amag > bmag; -} - -int4 -complex_abs_cmp(Complex * a, Complex * b) +static int +complex_abs_cmp_internal(Complex *a, Complex *b) { double amag = Mag(a), bmag = Mag(b); if (amag < bmag) return -1; - else if (amag > bmag) + if (amag > bmag) return 1; - else - return 0; + return 0; } -/***************************************************************************** - * test code - *****************************************************************************/ -/* - * You should always test your code separately. Trust me, using POSTGRES to - * debug your C function will be very painful and unproductive. In case of - * POSTGRES crashing, it is impossible to tell whether the bug is in your - * code or POSTGRES's. - */ -void test_main(void); -void -test_main() +PG_FUNCTION_INFO_V1(complex_abs_lt); + +Datum +complex_abs_lt(PG_FUNCTION_ARGS) { - Complex *a; - Complex *b; + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); - a = complex_in("(4.01, 3.77 )"); - printf("a = %s\n", complex_out(a)); - b = complex_in("(1.0,2.0)"); - printf("b = %s\n", complex_out(b)); - printf("a + b = %s\n", complex_out(complex_add(a, b))); - printf("a < b = %d\n", complex_abs_lt(a, b)); - printf("a <= b = %d\n", complex_abs_le(a, b)); - printf("a = b = %d\n", complex_abs_eq(a, b)); - printf("a >= b = %d\n", complex_abs_ge(a, b)); - printf("a > b = %d\n", complex_abs_gt(a, b)); + PG_RETURN_BOOL(complex_abs_cmp_internal(a, b) < 0); +} + +PG_FUNCTION_INFO_V1(complex_abs_le); + +Datum +complex_abs_le(PG_FUNCTION_ARGS) +{ + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(complex_abs_cmp_internal(a, b) <= 0); +} + +PG_FUNCTION_INFO_V1(complex_abs_eq); + +Datum +complex_abs_eq(PG_FUNCTION_ARGS) +{ + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(complex_abs_cmp_internal(a, b) == 0); +} + +PG_FUNCTION_INFO_V1(complex_abs_ge); + +Datum +complex_abs_ge(PG_FUNCTION_ARGS) +{ + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(complex_abs_cmp_internal(a, b) >= 0); +} + +PG_FUNCTION_INFO_V1(complex_abs_gt); + +Datum +complex_abs_gt(PG_FUNCTION_ARGS) +{ + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); + + PG_RETURN_BOOL(complex_abs_cmp_internal(a, b) > 0); +} + +PG_FUNCTION_INFO_V1(complex_abs_cmp); + +Datum +complex_abs_cmp(PG_FUNCTION_ARGS) +{ + Complex *a = (Complex *) PG_GETARG_POINTER(0); + Complex *b = (Complex *) PG_GETARG_POINTER(1); + + PG_RETURN_INT32(complex_abs_cmp_internal(a, b)); } diff --git a/src/tutorial/complex.source b/src/tutorial/complex.source index db33d9100e..9050e82a59 100644 --- a/src/tutorial/complex.source +++ b/src/tutorial/complex.source @@ -8,20 +8,25 @@ -- Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group -- Portions Copyright (c) 1994, Regents of the University of California -- --- $Header: /cvsroot/pgsql/src/tutorial/complex.source,v 1.15 2003/08/04 23:59:41 tgl Exp $ +-- $Header: /cvsroot/pgsql/src/tutorial/complex.source,v 1.16 2003/10/21 22:51:14 tgl Exp $ -- --------------------------------------------------------------------------- ----------------------------- -- Creating a new type: --- a user-defined type must have an input and an output function. They --- are user-defined C functions. We are going to create a new type --- called 'complex' which represents complex numbers. +-- We are going to create a new type called 'complex' which represents +-- complex numbers. +-- A user-defined type must have an input and an output function, and +-- optionally can have binary input and output functions. All of these +-- are usually user-defined C functions. ----------------------------- -- Assume the user defined functions are in _OBJWD_/complex$DLSUFFIX --- (we do not want to assume this is in the dynamic loader search path) --- Look at $PWD/complex.c for the source. +-- (we do not want to assume this is in the dynamic loader search path). +-- Look at $PWD/complex.c for the source. Note that we declare all of +-- them as STRICT, so we do not need to cope with NULL inputs in the +-- C code. We also mark them IMMUTABLE, since they always return the +-- same outputs given the same inputs. -- the input function 'complex_in' takes a null-terminated string (the -- textual representation of the type) and turns it into the internal @@ -31,7 +36,7 @@ CREATE FUNCTION complex_in(cstring) RETURNS complex AS '_OBJWD_/complex' - LANGUAGE 'c'; + LANGUAGE C IMMUTABLE STRICT; -- the output function 'complex_out' takes the internal representation and -- converts it into the textual representation. @@ -39,7 +44,24 @@ CREATE FUNCTION complex_in(cstring) CREATE FUNCTION complex_out(complex) RETURNS cstring AS '_OBJWD_/complex' - LANGUAGE 'c'; + LANGUAGE C IMMUTABLE STRICT; + +-- the binary input function 'complex_recv' takes a StringInfo buffer +-- and turns its contents into the internal representation. + +CREATE FUNCTION complex_recv(internal) + RETURNS complex + AS '_OBJWD_/complex' + LANGUAGE C IMMUTABLE STRICT; + +-- the binary output function 'complex_send' takes the internal representation +-- and converts it into a (hopefully) platform-independent bytea string. + +CREATE FUNCTION complex_send(complex) + RETURNS bytea + AS '_OBJWD_/complex' + LANGUAGE C IMMUTABLE STRICT; + -- now, we can create the type. The internallength specifies the size of the -- memory block required to hold the type (we need two 8-byte doubles). @@ -48,6 +70,8 @@ CREATE TYPE complex ( internallength = 16, input = complex_in, output = complex_out, + receive = complex_recv, + send = complex_send, alignment = double ); @@ -57,7 +81,7 @@ CREATE TYPE complex ( -- user-defined types can be used like ordinary built-in types. ----------------------------- --- eg. we can use it in a schema +-- eg. we can use it in a table CREATE TABLE test_complex ( a complex, @@ -84,7 +108,7 @@ SELECT * FROM test_complex; CREATE FUNCTION complex_add(complex, complex) RETURNS complex AS '_OBJWD_/complex' - LANGUAGE 'c'; + LANGUAGE C IMMUTABLE STRICT; -- we can now define the operator. We show a binary operator here but you -- can also define unary operators by omitting either of leftarg or rightarg. @@ -132,40 +156,47 @@ SELECT complex_sum(a) FROM test_complex; -- first, define the required operators CREATE FUNCTION complex_abs_lt(complex, complex) RETURNS bool - AS '_OBJWD_/complex' LANGUAGE 'c'; + AS '_OBJWD_/complex' LANGUAGE C IMMUTABLE STRICT; CREATE FUNCTION complex_abs_le(complex, complex) RETURNS bool - AS '_OBJWD_/complex' LANGUAGE 'c'; + AS '_OBJWD_/complex' LANGUAGE C IMMUTABLE STRICT; CREATE FUNCTION complex_abs_eq(complex, complex) RETURNS bool - AS '_OBJWD_/complex' LANGUAGE 'c'; + AS '_OBJWD_/complex' LANGUAGE C IMMUTABLE STRICT; CREATE FUNCTION complex_abs_ge(complex, complex) RETURNS bool - AS '_OBJWD_/complex' LANGUAGE 'c'; + AS '_OBJWD_/complex' LANGUAGE C IMMUTABLE STRICT; CREATE FUNCTION complex_abs_gt(complex, complex) RETURNS bool - AS '_OBJWD_/complex' LANGUAGE 'c'; + AS '_OBJWD_/complex' LANGUAGE C IMMUTABLE STRICT; CREATE OPERATOR < ( leftarg = complex, rightarg = complex, procedure = complex_abs_lt, + commutator = > , negator = >= , restrict = scalarltsel, join = scalarltjoinsel ); CREATE OPERATOR <= ( leftarg = complex, rightarg = complex, procedure = complex_abs_le, + commutator = >= , negator = > , restrict = scalarltsel, join = scalarltjoinsel ); CREATE OPERATOR = ( leftarg = complex, rightarg = complex, procedure = complex_abs_eq, + commutator = = , + -- leave out negator since we didn't create <> operator + -- negator = <> , restrict = eqsel, join = eqjoinsel ); CREATE OPERATOR >= ( leftarg = complex, rightarg = complex, procedure = complex_abs_ge, + commutator = <= , negator = < , restrict = scalargtsel, join = scalargtjoinsel ); CREATE OPERATOR > ( leftarg = complex, rightarg = complex, procedure = complex_abs_gt, + commutator = < , negator = <= , restrict = scalargtsel, join = scalargtjoinsel ); -- create the support function too CREATE FUNCTION complex_abs_cmp(complex, complex) RETURNS int4 - AS '_OBJWD_/complex' LANGUAGE 'c'; + AS '_OBJWD_/complex' LANGUAGE C IMMUTABLE STRICT; -- now we can make the operator class CREATE OPERATOR CLASS complex_abs_ops