Gene Selkov's SEG datatype (GiST example code)

This commit is contained in:
Tom Lane 2000-12-11 20:40:33 +00:00
parent 9892ddf5ee
commit a3694b420f
14 changed files with 6076 additions and 0 deletions

83
contrib/seg/Makefile Normal file
View File

@ -0,0 +1,83 @@
#
# $Header: /cvsroot/pgsql/contrib/seg/Makefile,v 1.1 2000/12/11 20:40:33 tgl Exp $
#
subdir = contrib/seg
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
# override libdir to install shlib in contrib not main directory
libdir := $(libdir)/contrib
# shared library parameters
NAME= seg
SO_MAJOR_VERSION= 1
SO_MINOR_VERSION= 0
override CPPFLAGS += -I$(srcdir)
OBJS= seg.o segparse.o segscan.o buffer.o
all: all-lib $(NAME).sql
# Shared library stuff
include $(top_srcdir)/src/Makefile.shlib
segparse.c segparse.h: segparse.y
$(YACC) -d $(YFLAGS) -p seg_yy $<
mv -f y.tab.c segparse.c
mv -f y.tab.h segparse.h
segscan.c: segscan.l
ifdef FLEX
$(FLEX) $(FLEXFLAGS) -Pseg_yy -o'$@' $<
else
@$(missing) flex $< $@
endif
$(NAME).sql: $(NAME).sql.in
sed -e 's:MODULE_PATHNAME:$(libdir)/$(shlib):g' < $< > $@
.PHONY: submake
submake:
$(MAKE) -C $(top_builddir)/src/test/regress pg_regress
# against installed postmaster
installcheck: submake
$(top_builddir)/src/test/regress/pg_regress seg
# in-tree test doesn't work yet (no way to install my shared library)
#check: all submake
# $(top_builddir)/src/test/regress/pg_regress --temp-install \
# --top-builddir=$(top_builddir) seg
check:
@echo "'make check' is not supported."
@echo "Do 'make install', then 'make installcheck' instead."
install: all installdirs install-lib
$(INSTALL_DATA) $(srcdir)/README.$(NAME) $(docdir)/contrib
$(INSTALL_DATA) $(NAME).sql $(datadir)/contrib
installdirs:
$(mkinstalldirs) $(docdir)/contrib $(datadir)/contrib $(libdir)
uninstall: uninstall-lib
rm -f $(docdir)/contrib/README.$(NAME) $(datadir)/contrib/$(NAME).sql
clean distclean maintainer-clean: clean-lib
rm -f segparse.c segparse.h segscan.c
rm -f y.tab.c y.tab.h $(OBJS) $(NAME).sql
# things created by various check targets
rm -rf results tmp_check log
rm -f regression.diffs regression.out regress.out run_check.out
ifeq ($(PORTNAME), win)
rm -f regress.def
endif
depend dep:
$(CC) -MM $(CFLAGS) *.c >depend
ifeq (depend,$(wildcard depend))
include depend
endif

326
contrib/seg/README.seg Normal file
View File

@ -0,0 +1,326 @@
This directory contains the code for the user-defined type,
SEG, representing laboratory measurements as floating point
intervals.
RATIONALE
=========
The geometry of measurements is usually more complex than that of a
point in a numeric continuum. A measurement is usually a segment of
that continuum with somewhat fuzzy limits. The measurements come out
as intervals because of uncertainty and randomness, as well as because
the value being measured may naturally be an interval indicating some
condition, such as the temperature range of stability of a protein.
Using just common sense, it appears more convenient to store such data
as intervals, rather than pairs of numbers. In practice, it even turns
out more efficient in most applications.
Further along the line of common sense, the fuzziness of the limits
suggests that the use of traditional numeric data types leads to a
certain loss of information. Consider this: your instrument reads
6.50, and you input this reading into the database. What do you get
when you fetch it? Watch:
test=> select 6.50 as "pH";
pH
---
6.5
(1 row)
In the world of measurements, 6.50 is not the same as 6.5. It may
sometimes be critically different. The experimenters usually write
down (and publish) the digits they trust. 6.50 is actually a fuzzy
interval contained within a bigger and even fuzzier interval, 6.5,
with their center points being (probably) the only common feature they
share. We definitely do not want such different data items to appear the
same.
Conclusion? It is nice to have a special data type that can record the
limits of an interval with arbitrarily variable precision. Variable in
a sense that each data element records its own precision.
Check this out:
test=> select '6.25 .. 6.50'::seg as "pH";
pH
------------
6.25 .. 6.50
(1 row)
FILES
=====
Makefile building instructions for the shared library
README.seg the file you are now reading
buffer.c global variables and buffer access utilities
shared between the parser (segparse.y) and the
scanner (segscan.l)
buffer.h function prototypes for buffer.c
seg.c the implementation of this data type in c
seg.sql.in SQL code needed to register this type with postgres
(transformed to seg.sql by make)
segdata.h the data structure used to store the segments
segparse.y the grammar file for the parser (used by seg_in() in seg.c)
segscan.l scanner rules (used by seg_yyparse() in segparse.y)
seg-validate.pl a simple input validation script. It is probably a
little stricter than the type itself: for example,
it rejects '22 ' because of the trailing space. Use
as a filter to discard bad values from a single column;
redirect to /dev/null to see the offending input
sort-segments.pl a script to sort the tables having a SEG type column
INSTALLATION
============
To install the type, run
make
make install
For this to work, make sure that:
. the seg source directory is in the postgres contrib directory
. the user running "make install" has postgres administrative authority
. this user's environment defines the PGLIB and PGDATA variables and has
postgres binaries in the PATH.
This only installs the type implementation and documentation. To make the
type available in any particular database, do
psql -d databasename < seg.sql
If you install the type in the template1 database, all subsequently created
databases will inherit it.
To test the new type, after "make install" do
make installcheck
If it fails, examine the file regression.diffs to find out the reason (the
test code is a direct adaptation of the regression tests from the main
source tree).
SYNTAX
======
The external representation of an interval is formed using one or two
floating point numbers joined by the range operator ('..' or '...').
Optional certainty indicators (<, > and ~) are ignored by the internal
logics, but are retained in the data.
Grammar
-------
rule 1 seg -> boundary PLUMIN deviation
rule 2 seg -> boundary RANGE boundary
rule 3 seg -> boundary RANGE
rule 4 seg -> RANGE boundary
rule 5 seg -> boundary
rule 6 boundary -> FLOAT
rule 7 boundary -> EXTENSION FLOAT
rule 8 deviation -> FLOAT
Tokens
------
RANGE (\.\.)(\.)?
PLUMIN \'\+\-\'
integer [+-]?[0-9]+
real [+-]?[0-9]+\.[0-9]+
FLOAT ({integer}|{real})([eE]{integer})?
EXTENSION [<>~]
Examples of valid SEG representations:
--------------------------------------
Any number (rules 5,6) -- creates a zero-length segment (a point,
if you will)
~5.0 (rules 5,7) -- creates a zero-length segment AND records
'~' in the data. This notation reads 'approximately 5.0',
but its meaning is not recognized by the code. It is ignored
until you get the value back. View it is a short-hand comment.
<5.0 (rules 5,7) -- creates a point at 5.0; '<' is ignored but
is preserved as a comment
>5.0 (rules 5,7) -- creates a point at 5.0; '>' is ignored but
is preserved as a comment
5(+-)0.3
5'+-'0.3 (rules 1,8) -- creates an interval '4.7..5.3'. As of this
writing (02/09/2000), this mechanism isn't completely accurate
in determining the number of significant digits for the
boundaries. For example, it adds an extra digit to the lower
boundary if the resulting interval includes a power of ten:
template1=> select '10(+-)1'::seg as seg;
seg
---------
9.0 .. 11 -- should be: 9 .. 11
Also, the (+-) notation is not preserved: 'a(+-)b' will
always be returned as '(a-b) .. (a+b)'. The purpose of this
notation is to allow input from certain data sources without
conversion.
50 .. (rule 3) -- everything that is greater than or equal to 50
.. 0 (rule 4) -- everything that is less than or equal to 0
1.5e-2 .. 2E-2 (rule 2) -- creates an interval (0.015 .. 0.02)
1 ... 2 The same as 1...2, or 1 .. 2, or 1..2 (space is ignored).
Because of the widespread use of '...' in the data sources,
I decided to stick to is as a range operator. This, and
also the fact that the white space around the range operator
is ignored, creates a parsing conflict with numeric constants
starting with a decimal point.
Examples of invalid SEG input:
------------------------------
.1e7 should be: 0.1e7
.1 .. .2 should be: 0.1 .. 0.2
2.4 E4 should be: 2.4E4
The following, although it is not a syntax error, is disallowed to improve
the sanity of the data:
5 .. 2 should be: 2 .. 5
PRECISION
=========
The segments are stored internally as pairs of 32-bit floating point
numbers. It means that the numbers with more than 7 significant digits
will be truncated.
The numbers with less than or exactly 7 significant digits retain their
original precision. That is, if your query returns 0.00, you will be
sure that the trailing zeroes are not the artifacts of formatting: they
reflect the precision of the original data. The number of leading
zeroes does not affect precision: the value 0.0067 is considered to
have just 2 significant digits.
USAGE
=====
The access method for SEG is a GiST (gist_seg_ops), which is a
generalization of R-tree. GiSTs allow the postgres implementation of
R-tree, originally encoded to support 2-D geometric types such as
boxes and polygons, to be used with any data type whose data domain
can be partitioned using the concepts of containment, intersection and
equality. In other words, everything that can intersect or contain
its own kind can be indexed with a GiST. That includes, among other
things, all geometric data types, regardless of their dimensionality
(see also contrib/cube).
The operators supported by the GiST access method include:
[a, b] << [c, d] Is left of
The left operand, [a, b], occurs entirely to the left of the
right operand, [c, d], on the axis (-inf, inf). It means,
[a, b] << [c, d] is true if b < c and false otherwise
[a, b] >> [c, d] Is right of
[a, b] is occurs entirely to the right of [c, d].
[a, b] >> [c, d] is true if b > c and false otherwise
[a, b] &< [c, d] Over left
The segment [a, b] overlaps the segment [c, d] in such a way
that a <= c <= b and b <= d
[a, b] &> [c, d] Over right
The segment [a, b] overlaps the segment [c, d] in such a way
that a > c and b <= c <= d
[a, b] = [c, d] Same as
The segments [a, b] and [c, d] are identical, that is, a == b
and c == d
[a, b] @ [c, d] Contains
The segment [a, b] contains the segment [c, d], that is,
a <= c and b >= d
[a, b] @ [c, d] Contained in
The segment [a, b] is contained in [c, d], that is,
a >= c and b <= d
Although the mnemonics of the following operators is questionable, I
preserved them to maintain visual consistency with other geometric
data types defined in Postgres.
Other operators:
[a, b] < [c, d] Less than
[a, b] > [c, d] Greater than
These operators do not make a lot of sense for any practical
purpose but sorting. These operators first compare (a) to (c),
and if these are equal, compare (b) to (d). That accounts for
reasonably good sorting in most cases, which is useful if
you want to use ORDER BY with this type
There are a few other potentially useful functions defined in seg.c
that vanished from the schema because I stopped using them. Some of
these were meant to support type casting. Let me know if I was wrong:
I will then add them back to the schema. I would also appreciate
other ideas that would enhance the type and make it more useful.
For examples of usage, see sql/seg.sql
NOTE: The performance of an R-tree index can largely depend on the
order of input values. It may be very helpful to sort the input table
on the SEG column (see the script sort-segments.pl for an example)
CREDITS
=======
My thanks are primarily to Prof. Joe Hellerstein
(http://db.cs.berkeley.edu/~jmh/) for elucidating the gist of the GiST
(http://gist.cs.berkeley.edu/). I am also grateful to all postgres
developers, present and past, for enabling myself to create my own
world and live undisturbed in it. And I would like to acknowledge my
gratitude to Argonne Lab and to the U.S. Department of Energy for the
years of faithful support of my database research.
------------------------------------------------------------------------
Gene Selkov, Jr.
Computational Scientist
Mathematics and Computer Science Division
Argonne National Laboratory
9700 S Cass Ave.
Building 221
Argonne, IL 60439-4844
selkovjr@mcs.anl.gov

79
contrib/seg/buffer.c Normal file
View File

@ -0,0 +1,79 @@
/* This module defines the parse buffer and routines for setting/reading it */
#include "postgres.h"
#include "utils/elog.h"
static char * PARSE_BUFFER;
static char * PARSE_BUFFER_PTR;
static unsigned int PARSE_BUFFER_SIZE;
static unsigned int SCANNER_POS;
void set_parse_buffer( char* s );
void reset_parse_buffer( void );
int read_parse_buffer( void );
char * parse_buffer( void );
char * parse_buffer_ptr( void );
unsigned int parse_buffer_curr_char( void );
unsigned int parse_buffer_size( void );
unsigned int parse_buffer_pos( void );
extern void seg_flush_scanner_buffer(void); /* defined in segscan.l */
void set_parse_buffer( char* s )
{
PARSE_BUFFER = s;
PARSE_BUFFER_SIZE = strlen(s);
if ( PARSE_BUFFER_SIZE == 0 ) {
elog(ERROR, "seg_in: can't parse an empty string");
}
PARSE_BUFFER_PTR = PARSE_BUFFER;
SCANNER_POS = 0;
}
void reset_parse_buffer( void )
{
PARSE_BUFFER_PTR = PARSE_BUFFER;
SCANNER_POS = 0;
seg_flush_scanner_buffer();
}
int read_parse_buffer( void )
{
int c;
/*
c = *PARSE_BUFFER_PTR++;
SCANNER_POS++;
*/
c = PARSE_BUFFER[SCANNER_POS];
if(SCANNER_POS < PARSE_BUFFER_SIZE)
SCANNER_POS++;
return c;
}
char * parse_buffer( void )
{
return PARSE_BUFFER;
}
unsigned int parse_buffer_curr_char( void )
{
return PARSE_BUFFER[SCANNER_POS];
}
char * parse_buffer_ptr( void )
{
return PARSE_BUFFER_PTR;
}
unsigned int parse_buffer_pos( void )
{
return SCANNER_POS;
}
unsigned int parse_buffer_size( void )
{
return PARSE_BUFFER_SIZE;
}

8
contrib/seg/buffer.h Normal file
View File

@ -0,0 +1,8 @@
extern void set_parse_buffer( char* s );
extern void reset_parse_buffer( void );
extern int read_parse_buffer( void );
extern char * parse_buffer( void );
extern char * parse_buffer_ptr( void );
extern unsigned int parse_buffer_curr_char( void );
extern unsigned int parse_buffer_pos( void );
extern unsigned int parse_buffer_size( void );

File diff suppressed because it is too large Load Diff

1064
contrib/seg/expected/seg.out Normal file

File diff suppressed because it is too large Load Diff

42
contrib/seg/seg-validate.pl Executable file
View File

@ -0,0 +1,42 @@
#!/usr/bin/perl
$integer = '[+-]?[0-9]+';
$real = '[+-]?[0-9]+\.[0-9]+';
$RANGE = '(\.\.)(\.)?';
$PLUMIN = q(\'\+\-\');
$FLOAT = "(($integer)|($real))([eE]($integer))?";
$EXTENSION = '<|>|~';
$boundary = "($EXTENSION)?$FLOAT";
$deviation = $FLOAT;
$rule_1 = $boundary . $PLUMIN . $deviation;
$rule_2 = $boundary . $RANGE . $boundary;
$rule_3 = $boundary . $RANGE;
$rule_4 = $RANGE . $boundary;
$rule_5 = $boundary;
print "$rule_5\n";
while (<>) {
# s/ +//g;
if ( /^($rule_1)$/ ) {
print;
}
elsif ( /^($rule_2)$/ ) {
print;
}
elsif ( /^($rule_3)$/ ) {
print;
}
elsif ( /^($rule_4)$/ ) {
print;
}
elsif ( /^($rule_5)$/ ) {
print;
}
else {
print STDERR "error in $_\n";
}
}

1049
contrib/seg/seg.c Normal file

File diff suppressed because it is too large Load Diff

361
contrib/seg/seg.sql.in Normal file
View File

@ -0,0 +1,361 @@
-- Create the user-defined type for 1-D floating point intervals (seg)
--
BEGIN TRANSACTION;
CREATE FUNCTION seg_in(opaque)
RETURNS opaque
AS 'MODULE_PATHNAME'
LANGUAGE 'c';
CREATE FUNCTION seg_out(opaque)
RETURNS opaque
AS 'MODULE_PATHNAME'
LANGUAGE 'c';
CREATE TYPE seg (
internallength = 12,
input = seg_in,
output = seg_out
);
COMMENT ON TYPE seg IS
'floating point interval ''FLOAT .. FLOAT'', ''.. FLOAT'', ''FLOAT ..'' or ''FLOAT''';
--
-- External C-functions for R-tree methods
--
-- Left/Right methods
CREATE FUNCTION seg_over_left(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_over_left(seg, seg) IS
'is over and left of';
CREATE FUNCTION seg_over_right(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_over_right(seg, seg) IS
'is over and right of';
CREATE FUNCTION seg_left(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_left(seg, seg) IS
'is left of';
CREATE FUNCTION seg_right(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_right(seg, seg) IS
'is right of';
-- Comparison methods
CREATE FUNCTION seg_lt(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_lt(seg, seg) IS
'less than';
CREATE FUNCTION seg_le(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_le(seg, seg) IS
'less than or equal';
CREATE FUNCTION seg_gt(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_gt(seg, seg) IS
'greater than';
CREATE FUNCTION seg_ge(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_ge(seg, seg) IS
'greater than or equal';
CREATE FUNCTION seg_contains(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_contains(seg, seg) IS
'contains';
CREATE FUNCTION seg_contained(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_contained(seg, seg) IS
'contained in';
CREATE FUNCTION seg_overlap(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_overlap(seg, seg) IS
'overlaps';
CREATE FUNCTION seg_same(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_same(seg, seg) IS
'same as';
CREATE FUNCTION seg_different(seg, seg) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
COMMENT ON FUNCTION seg_different(seg, seg) IS
'different';
-- support routines for indexing
CREATE FUNCTION seg_union(seg, seg) RETURNS seg
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION seg_inter(seg, seg) RETURNS seg
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION seg_size(seg) RETURNS float4
AS 'MODULE_PATHNAME' LANGUAGE 'c';
-- miscellaneous
CREATE FUNCTION seg_upper(seg) RETURNS float4
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION seg_lower(seg) RETURNS float4
AS 'MODULE_PATHNAME' LANGUAGE 'c';
--
-- OPERATORS
--
CREATE OPERATOR < (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_lt,
COMMUTATOR = '>', NEGATOR = '>=',
RESTRICT = scalarltsel, JOIN = scalarltjoinsel
);
CREATE OPERATOR <= (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_le,
COMMUTATOR = '>=', NEGATOR = '>',
RESTRICT = scalarltsel, JOIN = scalarltjoinsel
);
CREATE OPERATOR > (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_gt,
COMMUTATOR = '<', NEGATOR = '<=',
RESTRICT = scalargtsel, JOIN = scalargtjoinsel
);
CREATE OPERATOR >= (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_ge,
COMMUTATOR = '<=', NEGATOR = '<',
RESTRICT = scalargtsel, JOIN = scalargtjoinsel
);
CREATE OPERATOR << (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_left,
COMMUTATOR = '>>',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR &< (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_over_left,
COMMUTATOR = '&>',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR && (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_overlap,
COMMUTATOR = '&&',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR &> (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_over_right,
COMMUTATOR = '&<',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR >> (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_right,
COMMUTATOR = '<<',
RESTRICT = positionsel, JOIN = positionjoinsel
);
CREATE OPERATOR = (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_same,
COMMUTATOR = '=', NEGATOR = '<>',
RESTRICT = eqsel, JOIN = eqjoinsel,
SORT1 = '<', SORT2 = '<'
);
CREATE OPERATOR <> (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_different,
COMMUTATOR = '<>', NEGATOR = '=',
RESTRICT = neqsel, JOIN = neqjoinsel
);
CREATE OPERATOR @ (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_contains,
COMMUTATOR = '~',
RESTRICT = contsel, JOIN = contjoinsel
);
CREATE OPERATOR ~ (
LEFTARG = seg, RIGHTARG = seg, PROCEDURE = seg_contained,
COMMUTATOR = '@',
RESTRICT = contsel, JOIN = contjoinsel
);
-- define the GiST support methods
CREATE FUNCTION gseg_consistent(opaque,seg,int4) RETURNS bool
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION gseg_compress(opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION gseg_decompress(opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION gseg_penalty(opaque,opaque,opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION gseg_picksplit(opaque, opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION gseg_union(bytea, opaque) RETURNS seg
AS 'MODULE_PATHNAME' LANGUAGE 'c';
CREATE FUNCTION gseg_same(seg, seg, opaque) RETURNS opaque
AS 'MODULE_PATHNAME' LANGUAGE 'c';
-- register the default opclass for indexing
INSERT INTO pg_opclass (opcname, opcdeftype)
SELECT 'gist_seg_ops', oid
FROM pg_type
WHERE typname = 'seg';
-- get the comparators for segments and store them in a tmp table
SELECT o.oid AS opoid, o.oprname
INTO TABLE seg_ops_tmp
FROM pg_operator o, pg_type t
WHERE o.oprleft = t.oid and o.oprright = t.oid
and t.typname = 'seg';
-- make sure we have the right operators
-- SELECT * from seg_ops_tmp;
-- using the tmp table, generate the amop entries
-- seg_left
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 1
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '<<';
-- seg_overleft
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 2
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '&<';
-- seg_overlap
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 3
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '&&';
-- seg_overright
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 4
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '&>';
-- seg_right
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 5
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '>>';
-- seg_same
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 6
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '=';
-- seg_contains
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 7
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '@';
-- seg_contained
INSERT INTO pg_amop (amopid, amopclaid, amopopr, amopstrategy)
SELECT am.oid, opcl.oid, c.opoid, 8
FROM pg_am am, pg_opclass opcl, seg_ops_tmp c
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and c.oprname = '~';
DROP TABLE seg_ops_tmp;
-- add the entries to amproc for the support methods
-- note the amprocnum numbers associated with each are specific!
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 1
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_consistent';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 2
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_union';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 3
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_compress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 4
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_decompress';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 5
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_penalty';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 6
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_picksplit';
INSERT INTO pg_amproc (amid, amopclaid, amproc, amprocnum)
SELECT am.oid, opcl.oid, pro.oid, 7
FROM pg_am am, pg_opclass opcl, pg_proc pro
WHERE amname = 'gist' and opcname = 'gist_seg_ops'
and proname = 'gseg_same';
END TRANSACTION;

8
contrib/seg/segdata.h Normal file
View File

@ -0,0 +1,8 @@
typedef struct SEG {
float lower;
float upper;
char l_sigd;
char u_sigd;
char l_ext;
char u_ext;
} SEG;

183
contrib/seg/segparse.y Normal file
View File

@ -0,0 +1,183 @@
%{
#define YYERROR_VERBOSE
#define YYPARSE_PARAM result /* need this to pass a pointer (void *) to yyparse */
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "segdata.h"
#include "buffer.h"
#include "postgres.h"
#include "utils/elog.h"
#undef yylex /* falure to redefine yylex will result in calling the */
#define yylex seg_yylex /* wrong scanner when running inside postgres backend */
extern int errno;
extern int yylex(); /* defined as seg_yylex in segscan.c */
extern int significant_digits( char *str ); /* defined in seg.c */
int seg_yyerror( char *msg );
int seg_yyparse( void *result );
float seg_atof( char *value );
#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
#define ABS(X) ((X) < 0 ? (-X) : (X))
long threshold;
char strbuf[25] = {
'0', '0', '0', '0', '0',
'0', '0', '0', '0', '0',
'0', '0', '0', '0', '0',
'0', '0', '0', '0', '0',
'0', '0', '0', '0', '\0'
};
%}
/* BISON Declarations */
%union {
struct BND {
float val;
char ext;
char sigd;
} bnd;
char * text;
}
%token <text> FLOAT
%token <text> RANGE
%token <text> PLUMIN
%token <text> EXTENSION
%type <bnd> boundary
%type <bnd> deviation
%start range
/* Grammar follows */
%%
range:
boundary PLUMIN deviation {
((SEG *)result)->lower = $1.val - $3.val;
((SEG *)result)->upper = $1.val + $3.val;
sprintf(strbuf, "%g", ((SEG *)result)->lower);
((SEG *)result)->l_sigd = MAX(MIN(6, significant_digits(strbuf)), MAX($1.sigd, $3.sigd));
sprintf(strbuf, "%g", ((SEG *)result)->upper);
((SEG *)result)->u_sigd = MAX(MIN(6, significant_digits(strbuf)), MAX($1.sigd, $3.sigd));
((SEG *)result)->l_ext = '\0';
((SEG *)result)->u_ext = '\0';
}
|
boundary RANGE boundary {
((SEG *)result)->lower = $1.val;
((SEG *)result)->upper = $3.val;
if ( ((SEG *)result)->lower > ((SEG *)result)->upper ) {
reset_parse_buffer();
elog(ERROR, "swapped boundaries: %g is greater than %g", ((SEG *)result)->lower, ((SEG *)result)->upper );
YYERROR;
}
((SEG *)result)->l_sigd = $1.sigd;
((SEG *)result)->u_sigd = $3.sigd;
((SEG *)result)->l_ext = ( $1.ext ? $1.ext : '\0' );
((SEG *)result)->u_ext = ( $3.ext ? $3.ext : '\0' );
}
|
boundary RANGE {
((SEG *)result)->lower = $1.val;
((SEG *)result)->upper = HUGE;
((SEG *)result)->l_sigd = $1.sigd;
((SEG *)result)->u_sigd = 0;
((SEG *)result)->l_ext = ( $1.ext ? $1.ext : '\0' );
((SEG *)result)->u_ext = '-';
}
;
|
RANGE boundary {
((SEG *)result)->lower = -HUGE;
((SEG *)result)->upper = $2.val;
((SEG *)result)->l_sigd = 0;
((SEG *)result)->u_sigd = $2.sigd;
((SEG *)result)->l_ext = '-';
((SEG *)result)->u_ext = ( $2.ext ? $2.ext : '\0' );
}
|
boundary {
((SEG *)result)->lower = ((SEG *)result)->upper = $1.val;
((SEG *)result)->l_sigd = ((SEG *)result)->u_sigd = $1.sigd;
((SEG *)result)->l_ext = ((SEG *)result)->u_ext = ( $1.ext ? $1.ext : '\0' );
}
;
boundary:
FLOAT {
$$.ext = '\0';
$$.sigd = significant_digits($1);
$$.val = seg_atof($1);
}
|
EXTENSION FLOAT {
$$.ext = $1[0];
$$.sigd = significant_digits($2);
$$.val = seg_atof($2);
}
;
deviation:
FLOAT {
$$.ext = '\0';
$$.sigd = significant_digits($1);
$$.val = seg_atof($1);
}
;
%%
float seg_atof ( char *value ) {
float result;
char *buf = (char *) palloc(256);
errno = 0;
sscanf(value, "%f", &result);
if ( errno ) {
sprintf(buf, "numeric value %s unrepresentable", value);
reset_parse_buffer();
elog(ERROR, buf);
}
return result;
}
int seg_yyerror ( char *msg ) {
char *buf = (char *) palloc(256);
int position;
yyclearin;
if ( !strcmp(msg, "parse error, expecting `$'") ) {
msg = "expecting end of input";
}
position = parse_buffer_pos() > parse_buffer_size() ? parse_buffer_pos() - 1 : parse_buffer_pos();
sprintf(
buf,
"%s at or near position %d, character ('%c', \\%03o), input: '%s'\n",
msg,
position,
parse_buffer()[position - 1],
parse_buffer()[position - 1],
parse_buffer()
);
reset_parse_buffer();
elog(ERROR, buf);
return 0;
}

53
contrib/seg/segscan.l Normal file
View File

@ -0,0 +1,53 @@
%{
/*
** A scanner for EMP-style numeric ranges
*/
#include <string.h>
#include <stdio.h>
#include "segparse.h"
#include "buffer.h"
#define YY_NO_UNPUT 1
#undef yywrap
/* flex screws a couple symbols when used with the -P otion; fix those */
#define YY_DECL int seg_yylex YY_PROTO(( void )); \
int seg_yylex YY_PROTO(( void ))
#define yylval seg_yylval
/* redefined YY_INPUT reads byte-wise from the memory area defined in buffer.c */
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
{ \
int c = read_parse_buffer(); \
result = (c == '\0') ? YY_NULL : (buf[0] = c, 1); \
}
void seg_flush_scanner_buffer(void);
%}
range (\.\.)(\.)?
plumin (\'\+\-\')|(\(\+\-)\)
integer [+-]?[0-9]+
real [+-]?[0-9]+\.[0-9]+
float ({integer}|{real})([eE]{integer})?
%%
{range} yylval.text = yytext; return RANGE;
{plumin} yylval.text = yytext; return PLUMIN;
{float} yylval.text = yytext; return FLOAT;
\< yylval.text = "<"; return EXTENSION;
\> yylval.text = ">"; return EXTENSION;
\~ yylval.text = "~"; return EXTENSION;
[ ]+ /* discard spaces */
. return yytext[0]; /* alert parser of the garbage */
%%
int seg_yylex();
void seg_flush_scanner_buffer(void) {
YY_FLUSH_BUFFER;
}

20
contrib/seg/sort-segments.pl Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/perl
# this script will sort any table with the segment data type in its last column
while (<>) {
chomp;
push @rows, $_;
}
foreach ( sort {
@ar = split("\t", $a);
$valA = pop @ar;
$valA =~ s/[~<> ]+//g;
@ar = split("\t", $b);
$valB = pop @ar;
$valB =~ s/[~<> ]+//g;
$valA <=> $valB
} @rows ) {
print "$_\n";;
}

223
contrib/seg/sql/seg.sql Normal file
View File

@ -0,0 +1,223 @@
--
-- Test seg datatype
--
--
-- first, define the datatype. Turn off echoing so that expected file
-- does not depend on contents of seg.sql.
--
\set ECHO none
\i seg.sql
\set ECHO all
--
-- testing the input and output functions
--
-- Any number
SELECT '1'::seg AS seg;
SELECT '-1'::seg AS seg;
SELECT '1.0'::seg AS seg;
SELECT '-1.0'::seg AS seg;
SELECT '1e7'::seg AS seg;
SELECT '-1e7'::seg AS seg;
SELECT '1.0e7'::seg AS seg;
SELECT '-1.0e7'::seg AS seg;
SELECT '1e+7'::seg AS seg;
SELECT '-1e+7'::seg AS seg;
SELECT '1.0e+7'::seg AS seg;
SELECT '-1.0e+7'::seg AS seg;
SELECT '1e-7'::seg AS seg;
SELECT '-1e-7'::seg AS seg;
SELECT '1.0e-7'::seg AS seg;
SELECT '-1.0e-7'::seg AS seg;
SELECT '2e-6'::seg AS seg;
SELECT '2e-5'::seg AS seg;
SELECT '2e-4'::seg AS seg;
SELECT '2e-3'::seg AS seg;
SELECT '2e-2'::seg AS seg;
SELECT '2e-1'::seg AS seg;
SELECT '2e-0'::seg AS seg;
SELECT '2e+0'::seg AS seg;
SELECT '2e+1'::seg AS seg;
SELECT '2e+2'::seg AS seg;
SELECT '2e+3'::seg AS seg;
SELECT '2e+4'::seg AS seg;
SELECT '2e+5'::seg AS seg;
SELECT '2e+6'::seg AS seg;
-- Significant digits preserved
SELECT '1'::seg AS seg;
SELECT '1.0'::seg AS seg;
SELECT '1.00'::seg AS seg;
SELECT '1.000'::seg AS seg;
SELECT '1.0000'::seg AS seg;
SELECT '1.00000'::seg AS seg;
SELECT '1.000000'::seg AS seg;
SELECT '0.000000120'::seg AS seg;
SELECT '3.400e5'::seg AS seg;
-- Digits truncated
SELECT '12.34567890123456'::seg AS seg;
-- Numbers with certainty indicators
SELECT '~6.5'::seg AS seg;
SELECT '<6.5'::seg AS seg;
SELECT '>6.5'::seg AS seg;
SELECT '~ 6.5'::seg AS seg;
SELECT '< 6.5'::seg AS seg;
SELECT '> 6.5'::seg AS seg;
-- Open intervals
SELECT '0..'::seg AS seg;
SELECT '0...'::seg AS seg;
SELECT '0 ..'::seg AS seg;
SELECT '0 ...'::seg AS seg;
SELECT '..0'::seg AS seg;
SELECT '...0'::seg AS seg;
SELECT '.. 0'::seg AS seg;
SELECT '... 0'::seg AS seg;
-- Finite intervals
SELECT '0 .. 1'::seg AS seg;
SELECT '-1 .. 0'::seg AS seg;
SELECT '-1 .. 1'::seg AS seg;
-- (+/-) intervals
SELECT '0(+-)1'::seg AS seg;
SELECT '0(+-)1.0'::seg AS seg;
SELECT '1.0(+-)0.005'::seg AS seg;
SELECT '101(+-)1'::seg AS seg;
-- incorrect number of significant digits in 99.0:
SELECT '100(+-)1'::seg AS seg;
-- invalid input
SELECT ''::seg AS seg;
SELECT 'ABC'::seg AS seg;
SELECT '1ABC'::seg AS seg;
SELECT '1.'::seg AS seg;
SELECT '1.....'::seg AS seg;
SELECT '.1'::seg AS seg;
SELECT '1..2.'::seg AS seg;
SELECT '1 e7'::seg AS seg;
SELECT '1e700'::seg AS seg;
--
-- testing the operators
--
-- equality/inequality:
--
SELECT '24 .. 33.20'::seg = '24 .. 33.20'::seg AS bool;
SELECT '24 .. 33.20'::seg = '24 .. 33.21'::seg AS bool;
SELECT '24 .. 33.20'::seg != '24 .. 33.20'::seg AS bool;
SELECT '24 .. 33.20'::seg != '24 .. 33.21'::seg AS bool;
-- overlap
--
SELECT '1'::seg && '1'::seg AS bool;
SELECT '1'::seg && '2'::seg AS bool;
SELECT '0 ..'::seg && '0 ..'::seg AS bool;
SELECT '0 .. 1'::seg && '0 .. 1'::seg AS bool;
SELECT '..0'::seg && '0..'::seg AS bool;
SELECT '-1 .. 0.1'::seg && '0 .. 1'::seg AS bool;
SELECT '-1 .. 0'::seg && '0 .. 1'::seg AS bool;
SELECT '-1 .. -0.0001'::seg && '0 .. 1'::seg AS bool;
SELECT '0 ..'::seg && '1'::seg AS bool;
SELECT '0 .. 1'::seg && '1'::seg AS bool;
SELECT '0 .. 1'::seg && '2'::seg AS bool;
SELECT '0 .. 2'::seg && '1'::seg AS bool;
SELECT '1'::seg && '0 .. 1'::seg AS bool;
SELECT '2'::seg && '0 .. 1'::seg AS bool;
SELECT '1'::seg && '0 .. 2'::seg AS bool;
-- overlap on the left
--
SELECT '1'::seg &< '0'::seg AS bool;
SELECT '1'::seg &< '1'::seg AS bool;
SELECT '1'::seg &< '2'::seg AS bool;
SELECT '0 .. 1'::seg &< '0'::seg AS bool;
SELECT '0 .. 1'::seg &< '1'::seg AS bool;
SELECT '0 .. 1'::seg &< '2'::seg AS bool;
SELECT '0 .. 1'::seg &< '0 .. 0.5'::seg AS bool;
SELECT '0 .. 1'::seg &< '0 .. 1'::seg AS bool;
SELECT '0 .. 1'::seg &< '0 .. 2'::seg AS bool;
SELECT '0 .. 1'::seg &< '1 .. 2'::seg AS bool;
SELECT '0 .. 1'::seg &< '2 .. 3'::seg AS bool;
-- overlap on the right
--
SELECT '0'::seg &> '1'::seg AS bool;
SELECT '1'::seg &> '1'::seg AS bool;
SELECT '2'::seg &> '1'::seg AS bool;
SELECT '0'::seg &> '0 .. 1'::seg AS bool;
SELECT '1'::seg &> '0 .. 1'::seg AS bool;
SELECT '2'::seg &> '0 .. 1'::seg AS bool;
SELECT '0 .. 0.5'::seg &> '0 .. 1'::seg AS bool;
SELECT '0 .. 1'::seg &> '0 .. 1'::seg AS bool;
SELECT '0 .. 2'::seg &> '0 .. 2'::seg AS bool;
SELECT '1 .. 2'::seg &> '0 .. 1'::seg AS bool;
SELECT '2 .. 3'::seg &> '0 .. 1'::seg AS bool;
-- left
--
SELECT '1'::seg << '0'::seg AS bool;
SELECT '1'::seg << '1'::seg AS bool;
SELECT '1'::seg << '2'::seg AS bool;
SELECT '0 .. 1'::seg << '0'::seg AS bool;
SELECT '0 .. 1'::seg << '1'::seg AS bool;
SELECT '0 .. 1'::seg << '2'::seg AS bool;
SELECT '0 .. 1'::seg << '0 .. 0.5'::seg AS bool;
SELECT '0 .. 1'::seg << '0 .. 1'::seg AS bool;
SELECT '0 .. 1'::seg << '0 .. 2'::seg AS bool;
SELECT '0 .. 1'::seg << '1 .. 2'::seg AS bool;
SELECT '0 .. 1'::seg << '2 .. 3'::seg AS bool;
-- right
--
SELECT '0'::seg >> '1'::seg AS bool;
SELECT '1'::seg >> '1'::seg AS bool;
SELECT '2'::seg >> '1'::seg AS bool;
SELECT '0'::seg >> '0 .. 1'::seg AS bool;
SELECT '1'::seg >> '0 .. 1'::seg AS bool;
SELECT '2'::seg >> '0 .. 1'::seg AS bool;
SELECT '0 .. 0.5'::seg >> '0 .. 1'::seg AS bool;
SELECT '0 .. 1'::seg >> '0 .. 1'::seg AS bool;
SELECT '0 .. 2'::seg >> '0 .. 2'::seg AS bool;
SELECT '1 .. 2'::seg >> '0 .. 1'::seg AS bool;
SELECT '2 .. 3'::seg >> '0 .. 1'::seg AS bool;
-- "contained in" (the left value belongs within the interval specified in the right value):
--
SELECT '0'::seg ~ '0'::seg AS bool;
SELECT '0'::seg ~ '0 ..'::seg AS bool;
SELECT '0'::seg ~ '.. 0'::seg AS bool;
SELECT '0'::seg ~ '-1 .. 1'::seg AS bool;
SELECT '0'::seg ~ '-1 .. 1'::seg AS bool;
SELECT '-1'::seg ~ '-1 .. 1'::seg AS bool;
SELECT '1'::seg ~ '-1 .. 1'::seg AS bool;
SELECT '-1 .. 1'::seg ~ '-1 .. 1'::seg AS bool;
-- "contains" (the left value contains the interval specified in the right value):
--
SELECT '0'::seg @ '0'::seg AS bool;
SELECT '0 .. '::seg ~ '0'::seg AS bool;
SELECT '.. 0'::seg ~ '0'::seg AS bool;
SELECT '-1 .. 1'::seg ~ '0'::seg AS bool;
SELECT '0'::seg ~ '-1 .. 1'::seg AS bool;
SELECT '-1'::seg ~ '-1 .. 1'::seg AS bool;
SELECT '1'::seg ~ '-1 .. 1'::seg AS bool;
-- Load some example data and build the index
--
CREATE TABLE test_seg (s seg);
\copy test_seg from 'data/test_seg.data'
CREATE INDEX test_seg_ix ON test_seg USING gist (s);
SELECT count(*) FROM test_seg WHERE s @ '11..11.3';
-- Test sorting
SELECT * FROM test_seg WHERE s @ '11..11.3' GROUP BY s;