mirror of
https://git.postgresql.org/git/postgresql.git
synced 2024-09-10 08:19:26 +02:00
be6fcb671e
Make the examples self-contained to avoid confusion. Per bug report 8367 from KOIZUMI Satoru.
509 lines
19 KiB
Plaintext
509 lines
19 KiB
Plaintext
<!-- doc/src/sgml/rangetypes.sgml -->
|
|
|
|
<sect1 id="rangetypes">
|
|
<title>Range Types</title>
|
|
|
|
<indexterm>
|
|
<primary>range type</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Range types are data types representing a range of values of some
|
|
element type (called the range's <firstterm>subtype</>).
|
|
For instance, ranges
|
|
of <type>timestamp</type> might be used to represent the ranges of
|
|
time that a meeting room is reserved. In this case the data type
|
|
is <type>tsrange</type> (short for <quote>timestamp range</quote>),
|
|
and <type>timestamp</type> is the subtype. The subtype must have
|
|
a total order so that it is well-defined whether element values are
|
|
within, before, or after a range of values.
|
|
</para>
|
|
|
|
<para>
|
|
Range types are useful because they represent many element values in a
|
|
single range value, and because concepts such as overlapping ranges can
|
|
be expressed clearly. The use of time and date ranges for scheduling
|
|
purposes is the clearest example; but price ranges, measurement
|
|
ranges from an instrument, and so forth can also be useful.
|
|
</para>
|
|
|
|
<sect2 id="rangetypes-builtin">
|
|
<title>Built-in Range Types</title>
|
|
|
|
<para>
|
|
PostgreSQL comes with the following built-in range types:
|
|
<itemizedlist>
|
|
<listitem>
|
|
<para>
|
|
<type>int4range</type> — Range of <type>integer</type>
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
<type>int8range</type> — Range of <type>bigint</type>
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
<type>numrange</type> — Range of <type>numeric</type>
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
<type>tsrange</type> — Range of <type>timestamp without time zone</type>
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
<type>tstzrange</type> — Range of <type>timestamp with time zone</type>
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
<type>daterange</type> — Range of <type>date</type>
|
|
</para>
|
|
</listitem>
|
|
</itemizedlist>
|
|
In addition, you can define your own range types;
|
|
see <xref linkend="SQL-CREATETYPE"> for more information.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-examples">
|
|
<title>Examples</title>
|
|
|
|
<para>
|
|
<programlisting>
|
|
CREATE TABLE reservation (room int, during tsrange);
|
|
INSERT INTO reservation VALUES
|
|
(1108, '[2010-01-01 14:30, 2010-01-01 15:30)');
|
|
|
|
-- Containment
|
|
SELECT int4range(10, 20) @> 3;
|
|
|
|
-- Overlaps
|
|
SELECT numrange(11.1, 22.2) && numrange(20.0, 30.0);
|
|
|
|
-- Extract the upper bound
|
|
SELECT upper(int8range(15, 25));
|
|
|
|
-- Compute the intersection
|
|
SELECT int4range(10, 20) * int4range(15, 25);
|
|
|
|
-- Is the range empty?
|
|
SELECT isempty(numrange(1, 5));
|
|
</programlisting>
|
|
|
|
See <xref linkend="range-operators-table">
|
|
and <xref linkend="range-functions-table"> for complete lists of
|
|
operators and functions on range types.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-inclusivity">
|
|
<title>Inclusive and Exclusive Bounds</title>
|
|
|
|
<para>
|
|
Every non-empty range has two bounds, the lower bound and the upper
|
|
bound. All points between these values are included in the range. An
|
|
inclusive bound means that the boundary point itself is included in
|
|
the range as well, while an exclusive bound means that the boundary
|
|
point is not included in the range.
|
|
</para>
|
|
|
|
<para>
|
|
In the text form of a range, an inclusive lower bound is represented by
|
|
<quote><literal>[</literal></quote> while an exclusive lower bound is
|
|
represented by <quote><literal>(</literal></quote>. Likewise, an inclusive upper bound is represented by
|
|
<quote><literal>]</literal></quote>, while an exclusive upper bound is
|
|
represented by <quote><literal>)</literal></quote>.
|
|
(See <xref linkend="rangetypes-io"> for more details.)
|
|
</para>
|
|
|
|
<para>
|
|
The functions <literal>lower_inc</literal>
|
|
and <literal>upper_inc</literal> test the inclusivity of the lower
|
|
and upper bounds of a range value, respectively.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-infinite">
|
|
<title>Infinite (Unbounded) Ranges</title>
|
|
|
|
<para>
|
|
The lower bound of a range can be omitted, meaning that all points less
|
|
than the upper bound are included in the range. Likewise, if the upper
|
|
bound of the range is omitted, then all points greater than the lower bound
|
|
are included in the range. If both lower and upper bounds are omitted, all
|
|
values of the element type are considered to be in the range.
|
|
</para>
|
|
|
|
<para>
|
|
This is equivalent to considering that the lower bound is <quote>minus
|
|
infinity</quote>, or the upper bound is <quote>plus infinity</quote>,
|
|
respectively. But note that these infinite values are never values of
|
|
the range's element type, and can never be part of the range. (So there
|
|
is no such thing as an inclusive infinite bound — if you try to
|
|
write one, it will automatically be converted to an exclusive bound.)
|
|
</para>
|
|
|
|
<para>
|
|
Also, some element types have a notion of <quote>infinity</>, but that
|
|
is just another value so far as the range type mechanisms are concerned.
|
|
For example, in timestamp ranges, <literal>[today,]</> means the same
|
|
thing as <literal>[today,)</>. But <literal>[today,infinity]</> means
|
|
something different from <literal>[today,infinity)</> — the latter
|
|
excludes the special <type>timestamp</> value <literal>infinity</>.
|
|
</para>
|
|
|
|
<para>
|
|
The functions <literal>lower_inf</literal>
|
|
and <literal>upper_inf</literal> test for infinite lower
|
|
and upper bounds of a range, respectively.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-io">
|
|
<title>Range Input/Output</title>
|
|
|
|
<para>
|
|
The input for a range value must follow one of the following patterns:
|
|
<synopsis>
|
|
(<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>)
|
|
(<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>]
|
|
[<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>)
|
|
[<replaceable>lower-bound</replaceable>,<replaceable>upper-bound</replaceable>]
|
|
empty
|
|
</synopsis>
|
|
The parentheses or brackets indicate whether the lower and upper bounds
|
|
are exclusive or inclusive, as described previously.
|
|
Notice that the final pattern is <literal>empty</literal>, which
|
|
represents an empty range (a range that contains no points).
|
|
</para>
|
|
|
|
<para>
|
|
The <replaceable>lower-bound</replaceable> may be either a string
|
|
that is valid input for the subtype, or empty to indicate no
|
|
lower bound. Likewise, <replaceable>upper-bound</replaceable> may be
|
|
either a string that is valid input for the subtype, or empty to
|
|
indicate no upper bound.
|
|
</para>
|
|
|
|
<para>
|
|
Each bound value can be quoted using <literal>"</literal> (double quote)
|
|
characters. This is necessary if the bound value contains parentheses,
|
|
brackets, commas, double quotes, or backslashes, since these characters
|
|
would otherwise be taken as part of the range syntax. To put a double
|
|
quote or backslash in a quoted bound value, precede it with a
|
|
backslash. (Also, a pair of double quotes within a double-quoted bound
|
|
value is taken to represent a double quote character, analogously to the
|
|
rules for single quotes in SQL literal strings.) Alternatively, you can
|
|
avoid quoting and use backslash-escaping to protect all data characters
|
|
that would otherwise be taken as range syntax. Also, to write a bound
|
|
value that is an empty string, write <literal>""</literal>, since writing
|
|
nothing means an infinite bound.
|
|
</para>
|
|
|
|
<para>
|
|
Whitespace is allowed before and after the range value, but any whitespace
|
|
between the parentheses or brackets is taken as part of the lower or upper
|
|
bound value. (Depending on the element type, it might or might not be
|
|
significant.)
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
These rules are very similar to those for writing field values in
|
|
composite-type literals. See <xref linkend="rowtypes-io-syntax"> for
|
|
additional commentary.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
Examples:
|
|
<programlisting>
|
|
-- includes 3, does not include 7, and does include all points in between
|
|
SELECT '[3,7)'::int4range;
|
|
|
|
-- does not include either 3 or 7, but includes all points in between
|
|
SELECT '(3,7)'::int4range;
|
|
|
|
-- includes only the single point 4
|
|
SELECT '[4,4]'::int4range;
|
|
|
|
-- includes no points (and will be normalized to 'empty')
|
|
SELECT '[4,4)'::int4range;
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-construct">
|
|
<title>Constructing Ranges</title>
|
|
|
|
<para>
|
|
Each range type has a constructor function with the same name as the range
|
|
type. Using the constructor function is frequently more convenient than
|
|
writing a range literal constant, since it avoids the need for extra
|
|
quoting of the bound values. The constructor function
|
|
accepts two or three arguments. The two-argument form constructs a range
|
|
in standard form (lower bound inclusive, upper bound exclusive), while
|
|
the three-argument form constructs a range with bounds of the form
|
|
specified by the third argument.
|
|
The third argument must be one of the strings
|
|
<quote><literal>()</literal></quote>,
|
|
<quote><literal>(]</literal></quote>,
|
|
<quote><literal>[)</literal></quote>, or
|
|
<quote><literal>[]</literal></quote>.
|
|
For example:
|
|
|
|
<programlisting>
|
|
-- The full form is: lower bound, upper bound, and text argument indicating
|
|
-- inclusivity/exclusivity of bounds.
|
|
SELECT numrange(1.0, 14.0, '(]');
|
|
|
|
-- If the third argument is omitted, '[)' is assumed.
|
|
SELECT numrange(1.0, 14.0);
|
|
|
|
-- Although '(]' is specified here, on display the value will be converted to
|
|
-- canonical form, since int8range is a discrete range type (see below).
|
|
SELECT int8range(1, 14, '(]');
|
|
|
|
-- Using NULL for either bound causes the range to be unbounded on that side.
|
|
SELECT numrange(NULL, 2.2);
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-discrete">
|
|
<title>Discrete Range Types</title>
|
|
|
|
<para>
|
|
A discrete range is one whose element type has a well-defined
|
|
<quote>step</quote>, such as <type>integer</type> or <type>date</type>.
|
|
In these types two elements can be said to be adjacent, when there are
|
|
no valid values between them. This contrasts with continuous ranges,
|
|
where it's always (or almost always) possible to identify other element
|
|
values between two given values. For example, a range over the
|
|
<type>numeric</> type is continuous, as is a range over <type>timestamp</>.
|
|
(Even though <type>timestamp</> has limited precision, and so could
|
|
theoretically be treated as discrete, it's better to consider it continuous
|
|
since the step size is normally not of interest.)
|
|
</para>
|
|
|
|
<para>
|
|
Another way to think about a discrete range type is that there is a clear
|
|
idea of a <quote>next</> or <quote>previous</> value for each element value.
|
|
Knowing that, it is possible to convert between inclusive and exclusive
|
|
representations of a range's bounds, by choosing the next or previous
|
|
element value instead of the one originally given.
|
|
For example, in an integer range type <literal>[4,8]</> and
|
|
<literal>(3,9)</> denote the same set of values; but this would not be so
|
|
for a range over numeric.
|
|
</para>
|
|
|
|
<para>
|
|
A discrete range type should have a <firstterm>canonicalization</>
|
|
function that is aware of the desired step size for the element type.
|
|
The canonicalization function is charged with converting equivalent values
|
|
of the range type to have identical representations, in particular
|
|
consistently inclusive or exclusive bounds.
|
|
If a canonicalization function is not specified, then ranges with different
|
|
formatting will always be treated as unequal, even though they might
|
|
represent the same set of values in reality.
|
|
</para>
|
|
|
|
<para>
|
|
The built-in range types <type>int4range</type>, <type>int8range</type>,
|
|
and <type>daterange</type> all use a canonical form that includes
|
|
the lower bound and excludes the upper bound; that is,
|
|
<literal>[)</literal>. User-defined range types can use other conventions,
|
|
however.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-defining">
|
|
<title>Defining New Range Types</title>
|
|
|
|
<para>
|
|
Users can define their own range types. The most common reason to do
|
|
this is to use ranges over subtypes not provided among the built-in
|
|
range types.
|
|
For example, to define a new range type of subtype <type>float8</type>:
|
|
|
|
<programlisting>
|
|
CREATE TYPE floatrange AS RANGE (
|
|
subtype = float8,
|
|
subtype_diff = float8mi
|
|
);
|
|
|
|
SELECT '[1.234, 5.678]'::floatrange;
|
|
</programlisting>
|
|
|
|
Because <type>float8</type> has no meaningful
|
|
<quote>step</quote>, we do not define a canonicalization
|
|
function in this example.
|
|
</para>
|
|
|
|
<para>
|
|
If the subtype is considered to have discrete rather than continuous
|
|
values, the <command>CREATE TYPE</> command should specify a
|
|
<literal>canonical</> function.
|
|
The canonicalization function takes an input range value, and must return
|
|
an equivalent range value that may have different bounds and formatting.
|
|
The canonical output for two ranges that represent the same set of values,
|
|
for example the integer ranges <literal>[1, 7]</literal> and <literal>[1,
|
|
8)</literal>, must be identical. It doesn't matter which representation
|
|
you choose to be the canonical one, so long as two equivalent values with
|
|
different formattings are always mapped to the same value with the same
|
|
formatting. In addition to adjusting the inclusive/exclusive bounds
|
|
format, a canonicalization function might round off boundary values, in
|
|
case the desired step size is larger than what the subtype is capable of
|
|
storing. For instance, a range type over <type>timestamp</> could be
|
|
defined to have a step size of an hour, in which case the canonicalization
|
|
function would need to round off bounds that weren't a multiple of an hour,
|
|
or perhaps throw an error instead.
|
|
</para>
|
|
|
|
<para>
|
|
Defining your own range type also allows you to specify a different
|
|
subtype B-tree operator class or collation to use, so as to change the sort
|
|
ordering that determines which values fall into a given range.
|
|
</para>
|
|
|
|
<para>
|
|
In addition, any range type that is meant to be used with GiST or SP-GiST indexes
|
|
should define a subtype difference, or <literal>subtype_diff</>, function.
|
|
(the index will still work without <literal>subtype_diff</>, but it is
|
|
likely to be considerably less efficient than if a difference function is
|
|
provided.) The subtype difference function takes two input values of the
|
|
subtype, and returns their difference (i.e., <replaceable>X</> minus
|
|
<replaceable>Y</>) represented as a <type>float8</> value. In our example
|
|
above, the function that underlies the regular <type>float8</> minus
|
|
operator can be used; but for any other subtype, some type conversion would
|
|
be necessary. Some creative thought about how to represent differences as
|
|
numbers might be needed, too. To the greatest extent possible, the
|
|
<literal>subtype_diff</> function should agree with the sort ordering
|
|
implied by the selected operator class and collation; that is, its result
|
|
should be positive whenever its first argument is greater than its second
|
|
according to the sort ordering.
|
|
</para>
|
|
|
|
<para>
|
|
See <xref linkend="SQL-CREATETYPE"> for more information about creating
|
|
range types.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-indexing">
|
|
<title>Indexing</title>
|
|
|
|
<indexterm>
|
|
<primary>range type</primary>
|
|
<secondary>indexes on</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
GiST and SP-GiST indexes can be created for table columns of range types.
|
|
For instance, to create a GiST index:
|
|
<programlisting>
|
|
CREATE INDEX reservation_idx ON reservation USING gist (during);
|
|
</programlisting>
|
|
A GiST or SP-GiST index can accelerate queries involving these range operators:
|
|
<literal>=</>,
|
|
<literal>&&</>,
|
|
<literal><@</>,
|
|
<literal>@></>,
|
|
<literal><<</>,
|
|
<literal>>></>,
|
|
<literal>-|-</>,
|
|
<literal>&<</>, and
|
|
<literal>&></>
|
|
(see <xref linkend="range-operators-table"> for more information).
|
|
</para>
|
|
|
|
<para>
|
|
In addition, B-tree and hash indexes can be created for table columns of
|
|
range types. For these index types, basically the only useful range
|
|
operation is equality. There is a B-tree sort ordering defined for range
|
|
values, with corresponding <literal><</> and <literal>></> operators,
|
|
but the ordering is rather arbitrary and not usually useful in the real
|
|
world. Range types' B-tree and hash support is primarily meant to
|
|
allow sorting and hashing internally in queries, rather than creation of
|
|
actual indexes.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="rangetypes-constraint">
|
|
<title>Constraints on Ranges</title>
|
|
|
|
<indexterm>
|
|
<primary>range type</primary>
|
|
<secondary>exclude</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
While <literal>UNIQUE</literal> is a natural constraint for scalar
|
|
values, it is usually unsuitable for range types. Instead, an
|
|
exclusion constraint is often more appropriate
|
|
(see <link linkend="SQL-CREATETABLE-EXCLUDE">CREATE TABLE
|
|
... CONSTRAINT ... EXCLUDE</link>). Exclusion constraints allow the
|
|
specification of constraints such as <quote>non-overlapping</quote> on a
|
|
range type. For example:
|
|
|
|
<programlisting>
|
|
CREATE TABLE reservation (
|
|
during tsrange,
|
|
EXCLUDE USING gist (during WITH &&)
|
|
);
|
|
</programlisting>
|
|
|
|
That constraint will prevent any overlapping values from existing
|
|
in the table at the same time:
|
|
|
|
<programlisting>
|
|
INSERT INTO reservation VALUES
|
|
('[2010-01-01 11:30, 2010-01-01 15:00)');
|
|
INSERT 0 1
|
|
|
|
INSERT INTO reservation VALUES
|
|
('[2010-01-01 14:45, 2010-01-01 15:45)');
|
|
ERROR: conflicting key value violates exclusion constraint "reservation_during_excl"
|
|
DETAIL: Key (during)=(["2010-01-01 14:45:00","2010-01-01 15:45:00")) conflicts
|
|
with existing key (during)=(["2010-01-01 11:30:00","2010-01-01 15:00:00")).
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
You can use the <link linkend="btree-gist"><literal>btree_gist</></link>
|
|
extension to define exclusion constraints on plain scalar data types, which
|
|
can then be combined with range exclusions for maximum flexibility. For
|
|
example, after <literal>btree_gist</literal> is installed, the following
|
|
constraint will reject overlapping ranges only if the meeting room numbers
|
|
are equal:
|
|
|
|
<programlisting>
|
|
CREATE EXTENSION btree_gist;
|
|
CREATE TABLE room_reservation (
|
|
room text,
|
|
during tsrange,
|
|
EXCLUDE USING gist (room WITH =, during WITH &&)
|
|
);
|
|
|
|
INSERT INTO room_reservation VALUES
|
|
('123A', '[2010-01-01 14:00, 2010-01-01 15:00)');
|
|
INSERT 0 1
|
|
|
|
INSERT INTO room_reservation VALUES
|
|
('123A', '[2010-01-01 14:30, 2010-01-01 15:30)');
|
|
ERROR: conflicting key value violates exclusion constraint "room_reservation_room_during_excl"
|
|
DETAIL: Key (room, during)=(123A, ["2010-01-01 14:30:00","2010-01-01 15:30:00")) conflicts
|
|
with existing key (room, during)=(123A, ["2010-01-01 14:00:00","2010-01-01 15:00:00")).
|
|
|
|
INSERT INTO room_reservation VALUES
|
|
('123B', '[2010-01-01 14:30, 2010-01-01 15:30)');
|
|
INSERT 0 1
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
</sect1>
|