5394 lines
190 KiB
Plaintext
5394 lines
190 KiB
Plaintext
<!-- doc/src/sgml/datatype.sgml -->
|
|
|
|
<chapter id="datatype">
|
|
<title>Data Types</title>
|
|
|
|
<indexterm zone="datatype">
|
|
<primary>data type</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>type</primary>
|
|
<see>data type</see>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> has a rich set of native data
|
|
types available to users. Users can add new types to
|
|
<productname>PostgreSQL</productname> using the <xref
|
|
linkend="sql-createtype"/> command.
|
|
</para>
|
|
|
|
<para>
|
|
<xref linkend="datatype-table"/> shows all the built-in general-purpose data
|
|
types. Most of the alternative names listed in the
|
|
<quote>Aliases</quote> column are the names used internally by
|
|
<productname>PostgreSQL</productname> for historical reasons. In
|
|
addition, some internally used or deprecated types are available,
|
|
but are not listed here.
|
|
</para>
|
|
|
|
<table id="datatype-table">
|
|
<title>Data Types</title>
|
|
<tgroup cols="3">
|
|
<colspec colname="col1" colwidth="2*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Aliases</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry><type>bigint</type></entry>
|
|
<entry><type>int8</type></entry>
|
|
<entry>signed eight-byte integer</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>bigserial</type></entry>
|
|
<entry><type>serial8</type></entry>
|
|
<entry>autoincrementing eight-byte integer</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>bit [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry></entry>
|
|
<entry>fixed-length bit string</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>bit varying [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry><type>varbit [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry>variable-length bit string</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>boolean</type></entry>
|
|
<entry><type>bool</type></entry>
|
|
<entry>logical Boolean (true/false)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>box</type></entry>
|
|
<entry></entry>
|
|
<entry>rectangular box on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>bytea</type></entry>
|
|
<entry></entry>
|
|
<entry>binary data (<quote>byte array</quote>)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>character [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry><type>char [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry>fixed-length character string</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>character varying [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry><type>varchar [ (<replaceable>n</replaceable>) ]</type></entry>
|
|
<entry>variable-length character string</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>cidr</type></entry>
|
|
<entry></entry>
|
|
<entry>IPv4 or IPv6 network address</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>circle</type></entry>
|
|
<entry></entry>
|
|
<entry>circle on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>date</type></entry>
|
|
<entry></entry>
|
|
<entry>calendar date (year, month, day)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>double precision</type></entry>
|
|
<entry><type>float8</type></entry>
|
|
<entry>double precision floating-point number (8 bytes)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>inet</type></entry>
|
|
<entry></entry>
|
|
<entry>IPv4 or IPv6 host address</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>integer</type></entry>
|
|
<entry><type>int</type>, <type>int4</type></entry>
|
|
<entry>signed four-byte integer</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>interval [ <replaceable>fields</replaceable> ] [ (<replaceable>p</replaceable>) ]</type></entry>
|
|
<entry></entry>
|
|
<entry>time span</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>json</type></entry>
|
|
<entry></entry>
|
|
<entry>textual JSON data</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>jsonb</type></entry>
|
|
<entry></entry>
|
|
<entry>binary JSON data, decomposed</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>line</type></entry>
|
|
<entry></entry>
|
|
<entry>infinite line on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>lseg</type></entry>
|
|
<entry></entry>
|
|
<entry>line segment on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>macaddr</type></entry>
|
|
<entry></entry>
|
|
<entry>MAC (Media Access Control) address</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>macaddr8</type></entry>
|
|
<entry></entry>
|
|
<entry>MAC (Media Access Control) address (EUI-64 format)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>money</type></entry>
|
|
<entry></entry>
|
|
<entry>currency amount</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>numeric [ (<replaceable>p</replaceable>,
|
|
<replaceable>s</replaceable>) ]</type></entry>
|
|
<entry><type>decimal [ (<replaceable>p</replaceable>,
|
|
<replaceable>s</replaceable>) ]</type></entry>
|
|
<entry>exact numeric of selectable precision</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>path</type></entry>
|
|
<entry></entry>
|
|
<entry>geometric path on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>pg_lsn</type></entry>
|
|
<entry></entry>
|
|
<entry><productname>PostgreSQL</productname> Log Sequence Number</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>pg_snapshot</type></entry>
|
|
<entry></entry>
|
|
<entry>user-level transaction ID snapshot</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>point</type></entry>
|
|
<entry></entry>
|
|
<entry>geometric point on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>polygon</type></entry>
|
|
<entry></entry>
|
|
<entry>closed geometric path on a plane</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>real</type></entry>
|
|
<entry><type>float4</type></entry>
|
|
<entry>single precision floating-point number (4 bytes)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>smallint</type></entry>
|
|
<entry><type>int2</type></entry>
|
|
<entry>signed two-byte integer</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>smallserial</type></entry>
|
|
<entry><type>serial2</type></entry>
|
|
<entry>autoincrementing two-byte integer</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>serial</type></entry>
|
|
<entry><type>serial4</type></entry>
|
|
<entry>autoincrementing four-byte integer</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>text</type></entry>
|
|
<entry></entry>
|
|
<entry>variable-length character string</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>time [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
|
|
<entry></entry>
|
|
<entry>time of day (no time zone)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>time [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
|
|
<entry><type>timetz</type></entry>
|
|
<entry>time of day, including time zone</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>timestamp [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
|
|
<entry></entry>
|
|
<entry>date and time (no time zone)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>timestamp [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
|
|
<entry><type>timestamptz</type></entry>
|
|
<entry>date and time, including time zone</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>tsquery</type></entry>
|
|
<entry></entry>
|
|
<entry>text search query</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>tsvector</type></entry>
|
|
<entry></entry>
|
|
<entry>text search document</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>txid_snapshot</type></entry>
|
|
<entry></entry>
|
|
<entry>user-level transaction ID snapshot (deprecated; see <type>pg_snapshot</type>)</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>uuid</type></entry>
|
|
<entry></entry>
|
|
<entry>universally unique identifier</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>xml</type></entry>
|
|
<entry></entry>
|
|
<entry>XML data</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<note>
|
|
<title>Compatibility</title>
|
|
<para>
|
|
The following types (or spellings thereof) are specified by
|
|
<acronym>SQL</acronym>: <type>bigint</type>, <type>bit</type>, <type>bit
|
|
varying</type>, <type>boolean</type>, <type>char</type>,
|
|
<type>character varying</type>, <type>character</type>,
|
|
<type>varchar</type>, <type>date</type>, <type>double
|
|
precision</type>, <type>integer</type>, <type>interval</type>,
|
|
<type>numeric</type>, <type>decimal</type>, <type>real</type>,
|
|
<type>smallint</type>, <type>time</type> (with or without time zone),
|
|
<type>timestamp</type> (with or without time zone),
|
|
<type>xml</type>.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
Each data type has an external representation determined by its input
|
|
and output functions. Many of the built-in types have
|
|
obvious external formats. However, several types are either unique
|
|
to <productname>PostgreSQL</productname>, such as geometric
|
|
paths, or have several possible formats, such as the date
|
|
and time types.
|
|
Some of the input and output functions are not invertible, i.e.,
|
|
the result of an output function might lose accuracy when compared to
|
|
the original input.
|
|
</para>
|
|
|
|
<sect1 id="datatype-numeric">
|
|
<title>Numeric Types</title>
|
|
|
|
<indexterm zone="datatype-numeric">
|
|
<primary>data type</primary>
|
|
<secondary>numeric</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Numeric types consist of two-, four-, and eight-byte integers,
|
|
four- and eight-byte floating-point numbers, and selectable-precision
|
|
decimals. <xref linkend="datatype-numeric-table"/> lists the
|
|
available types.
|
|
</para>
|
|
|
|
<table id="datatype-numeric-table">
|
|
<title>Numeric Types</title>
|
|
<tgroup cols="4">
|
|
<colspec colname="col1" colwidth="2*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<colspec colname="col4" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
<entry>Range</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry><type>smallint</type></entry>
|
|
<entry>2 bytes</entry>
|
|
<entry>small-range integer</entry>
|
|
<entry>-32768 to +32767</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>integer</type></entry>
|
|
<entry>4 bytes</entry>
|
|
<entry>typical choice for integer</entry>
|
|
<entry>-2147483648 to +2147483647</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>bigint</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>large-range integer</entry>
|
|
<entry>-9223372036854775808 to +9223372036854775807</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>decimal</type></entry>
|
|
<entry>variable</entry>
|
|
<entry>user-specified precision, exact</entry>
|
|
<entry>up to 131072 digits before the decimal point; up to 16383 digits after the decimal point</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>numeric</type></entry>
|
|
<entry>variable</entry>
|
|
<entry>user-specified precision, exact</entry>
|
|
<entry>up to 131072 digits before the decimal point; up to 16383 digits after the decimal point</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>real</type></entry>
|
|
<entry>4 bytes</entry>
|
|
<entry>variable-precision, inexact</entry>
|
|
<entry>6 decimal digits precision</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>double precision</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>variable-precision, inexact</entry>
|
|
<entry>15 decimal digits precision</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>smallserial</type></entry>
|
|
<entry>2 bytes</entry>
|
|
<entry>small autoincrementing integer</entry>
|
|
<entry>1 to 32767</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>serial</type></entry>
|
|
<entry>4 bytes</entry>
|
|
<entry>autoincrementing integer</entry>
|
|
<entry>1 to 2147483647</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>bigserial</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>large autoincrementing integer</entry>
|
|
<entry>1 to 9223372036854775807</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
The syntax of constants for the numeric types is described in
|
|
<xref linkend="sql-syntax-constants"/>. The numeric types have a
|
|
full set of corresponding arithmetic operators and
|
|
functions. Refer to <xref linkend="functions"/> for more
|
|
information. The following sections describe the types in detail.
|
|
</para>
|
|
|
|
<sect2 id="datatype-int">
|
|
<title>Integer Types</title>
|
|
|
|
<indexterm zone="datatype-int">
|
|
<primary>integer</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-int">
|
|
<primary>smallint</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-int">
|
|
<primary>bigint</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>int4</primary>
|
|
<see>integer</see>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>int2</primary>
|
|
<see>smallint</see>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>int8</primary>
|
|
<see>bigint</see>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The types <type>smallint</type>, <type>integer</type>, and
|
|
<type>bigint</type> store whole numbers, that is, numbers without
|
|
fractional components, of various ranges. Attempts to store
|
|
values outside of the allowed range will result in an error.
|
|
</para>
|
|
|
|
<para>
|
|
The type <type>integer</type> is the common choice, as it offers
|
|
the best balance between range, storage size, and performance.
|
|
The <type>smallint</type> type is generally only used if disk
|
|
space is at a premium. The <type>bigint</type> type is designed to be
|
|
used when the range of the <type>integer</type> type is insufficient.
|
|
</para>
|
|
|
|
<para>
|
|
<acronym>SQL</acronym> only specifies the integer types
|
|
<type>integer</type> (or <type>int</type>),
|
|
<type>smallint</type>, and <type>bigint</type>. The
|
|
type names <type>int2</type>, <type>int4</type>, and
|
|
<type>int8</type> are extensions, which are also used by some
|
|
other <acronym>SQL</acronym> database systems.
|
|
</para>
|
|
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-numeric-decimal">
|
|
<title>Arbitrary Precision Numbers</title>
|
|
|
|
<indexterm>
|
|
<primary>numeric (data type)</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>arbitrary precision numbers</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>decimal</primary>
|
|
<see>numeric</see>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The type <type>numeric</type> can store numbers with a
|
|
very large number of digits. It is especially recommended for
|
|
storing monetary amounts and other quantities where exactness is
|
|
required. Calculations with <type>numeric</type> values yield exact
|
|
results where possible, e.g., addition, subtraction, multiplication.
|
|
However, calculations on <type>numeric</type> values are very slow
|
|
compared to the integer types, or to the floating-point types
|
|
described in the next section.
|
|
</para>
|
|
|
|
<para>
|
|
We use the following terms below: The
|
|
<firstterm>precision</firstterm> of a <type>numeric</type>
|
|
is the total count of significant digits in the whole number,
|
|
that is, the number of digits to both sides of the decimal point.
|
|
The <firstterm>scale</firstterm> of a <type>numeric</type> is the
|
|
count of decimal digits in the fractional part, to the right of the
|
|
decimal point. So the number 23.5141 has a precision of 6 and a
|
|
scale of 4. Integers can be considered to have a scale of zero.
|
|
</para>
|
|
|
|
<para>
|
|
Both the maximum precision and the maximum scale of a
|
|
<type>numeric</type> column can be
|
|
configured. To declare a column of type <type>numeric</type> use
|
|
the syntax:
|
|
<programlisting>
|
|
NUMERIC(<replaceable>precision</replaceable>, <replaceable>scale</replaceable>)
|
|
</programlisting>
|
|
The precision must be positive, while the scale may be positive or
|
|
negative (see below). Alternatively:
|
|
<programlisting>
|
|
NUMERIC(<replaceable>precision</replaceable>)
|
|
</programlisting>
|
|
selects a scale of 0. Specifying:
|
|
<programlisting>
|
|
NUMERIC
|
|
</programlisting>
|
|
without any precision or scale creates an <quote>unconstrained
|
|
numeric</quote> column in which numeric values of any length can be
|
|
stored, up to the implementation limits. A column of this kind will
|
|
not coerce input values to any particular scale, whereas
|
|
<type>numeric</type> columns with a declared scale will coerce
|
|
input values to that scale. (The <acronym>SQL</acronym> standard
|
|
requires a default scale of 0, i.e., coercion to integer
|
|
precision. We find this a bit useless. If you're concerned
|
|
about portability, always specify the precision and scale
|
|
explicitly.)
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
The maximum precision that can be explicitly specified in
|
|
a <type>numeric</type> type declaration is 1000. An
|
|
unconstrained <type>numeric</type> column is subject to the limits
|
|
described in <xref linkend="datatype-numeric-table"/>.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
If the scale of a value to be stored is greater than the declared
|
|
scale of the column, the system will round the value to the specified
|
|
number of fractional digits. Then, if the number of digits to the
|
|
left of the decimal point exceeds the declared precision minus the
|
|
declared scale, an error is raised.
|
|
For example, a column declared as
|
|
<programlisting>
|
|
NUMERIC(3, 1)
|
|
</programlisting>
|
|
will round values to 1 decimal place and can store values between
|
|
-99.9 and 99.9, inclusive.
|
|
</para>
|
|
|
|
<para>
|
|
Beginning in <productname>PostgreSQL</productname> 15, it is allowed
|
|
to declare a <type>numeric</type> column with a negative scale. Then
|
|
values will be rounded to the left of the decimal point. The
|
|
precision still represents the maximum number of non-rounded digits.
|
|
Thus, a column declared as
|
|
<programlisting>
|
|
NUMERIC(2, -3)
|
|
</programlisting>
|
|
will round values to the nearest thousand and can store values
|
|
between -99000 and 99000, inclusive.
|
|
It is also allowed to declare a scale larger than the declared
|
|
precision. Such a column can only hold fractional values, and it
|
|
requires the number of zero digits just to the right of the decimal
|
|
point to be at least the declared scale minus the declared precision.
|
|
For example, a column declared as
|
|
<programlisting>
|
|
NUMERIC(3, 5)
|
|
</programlisting>
|
|
will round values to 5 decimal places and can store values between
|
|
-0.00999 and 0.00999, inclusive.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
<productname>PostgreSQL</productname> permits the scale in a
|
|
<type>numeric</type> type declaration to be any value in the range
|
|
-1000 to 1000. However, the <acronym>SQL</acronym> standard requires
|
|
the scale to be in the range 0 to <replaceable>precision</replaceable>.
|
|
Using scales outside that range may not be portable to other database
|
|
systems.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
Numeric values are physically stored without any extra leading or
|
|
trailing zeroes. Thus, the declared precision and scale of a column
|
|
are maximums, not fixed allocations. (In this sense the <type>numeric</type>
|
|
type is more akin to <type>varchar(<replaceable>n</replaceable>)</type>
|
|
than to <type>char(<replaceable>n</replaceable>)</type>.) The actual storage
|
|
requirement is two bytes for each group of four decimal digits,
|
|
plus three to eight bytes overhead.
|
|
</para>
|
|
|
|
<indexterm>
|
|
<primary>infinity</primary>
|
|
<secondary>numeric (data type)</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>NaN</primary>
|
|
<see>not a number</see>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>not a number</primary>
|
|
<secondary>numeric (data type)</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
In addition to ordinary numeric values, the <type>numeric</type> type
|
|
has several special values:
|
|
<literallayout>
|
|
<literal>Infinity</literal>
|
|
<literal>-Infinity</literal>
|
|
<literal>NaN</literal>
|
|
</literallayout>
|
|
These are adapted from the IEEE 754 standard, and represent
|
|
<quote>infinity</quote>, <quote>negative infinity</quote>, and
|
|
<quote>not-a-number</quote>, respectively. When writing these values
|
|
as constants in an SQL command, you must put quotes around them,
|
|
for example <literal>UPDATE table SET x = '-Infinity'</literal>.
|
|
On input, these strings are recognized in a case-insensitive manner.
|
|
The infinity values can alternatively be spelled <literal>inf</literal>
|
|
and <literal>-inf</literal>.
|
|
</para>
|
|
|
|
<para>
|
|
The infinity values behave as per mathematical expectations. For
|
|
example, <literal>Infinity</literal> plus any finite value equals
|
|
<literal>Infinity</literal>, as does <literal>Infinity</literal>
|
|
plus <literal>Infinity</literal>; but <literal>Infinity</literal>
|
|
minus <literal>Infinity</literal> yields <literal>NaN</literal> (not a
|
|
number), because it has no well-defined interpretation. Note that an
|
|
infinity can only be stored in an unconstrained <type>numeric</type>
|
|
column, because it notionally exceeds any finite precision limit.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>NaN</literal> (not a number) value is used to represent
|
|
undefined calculational results. In general, any operation with
|
|
a <literal>NaN</literal> input yields another <literal>NaN</literal>.
|
|
The only exception is when the operation's other inputs are such that
|
|
the same output would be obtained if the <literal>NaN</literal> were to
|
|
be replaced by any finite or infinite numeric value; then, that output
|
|
value is used for <literal>NaN</literal> too. (An example of this
|
|
principle is that <literal>NaN</literal> raised to the zero power
|
|
yields one.)
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
In most implementations of the <quote>not-a-number</quote> concept,
|
|
<literal>NaN</literal> is not considered equal to any other numeric
|
|
value (including <literal>NaN</literal>). In order to allow
|
|
<type>numeric</type> values to be sorted and used in tree-based
|
|
indexes, <productname>PostgreSQL</productname> treats <literal>NaN</literal>
|
|
values as equal, and greater than all non-<literal>NaN</literal>
|
|
values.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
The types <type>decimal</type> and <type>numeric</type> are
|
|
equivalent. Both types are part of the <acronym>SQL</acronym>
|
|
standard.
|
|
</para>
|
|
|
|
<para>
|
|
When rounding values, the <type>numeric</type> type rounds ties away
|
|
from zero, while (on most machines) the <type>real</type>
|
|
and <type>double precision</type> types round ties to the nearest even
|
|
number. For example:
|
|
|
|
<programlisting>
|
|
SELECT x,
|
|
round(x::numeric) AS num_round,
|
|
round(x::double precision) AS dbl_round
|
|
FROM generate_series(-3.5, 3.5, 1) as x;
|
|
x | num_round | dbl_round
|
|
------+-----------+-----------
|
|
-3.5 | -4 | -4
|
|
-2.5 | -3 | -2
|
|
-1.5 | -2 | -2
|
|
-0.5 | -1 | -0
|
|
0.5 | 1 | 0
|
|
1.5 | 2 | 2
|
|
2.5 | 3 | 2
|
|
3.5 | 4 | 4
|
|
(8 rows)
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
|
|
<sect2 id="datatype-float">
|
|
<title>Floating-Point Types</title>
|
|
|
|
<indexterm zone="datatype-float">
|
|
<primary>real</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-float">
|
|
<primary>double precision</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>float4</primary>
|
|
<see>real</see>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>float8</primary>
|
|
<see>double precision</see>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-float">
|
|
<primary>floating point</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The data types <type>real</type> and <type>double precision</type> are
|
|
inexact, variable-precision numeric types. On all currently supported
|
|
platforms, these types are implementations of <acronym>IEEE</acronym>
|
|
Standard 754 for Binary Floating-Point Arithmetic (single and double
|
|
precision, respectively), to the extent that the underlying processor,
|
|
operating system, and compiler support it.
|
|
</para>
|
|
|
|
<para>
|
|
Inexact means that some values cannot be converted exactly to the
|
|
internal format and are stored as approximations, so that storing
|
|
and retrieving a value might show slight discrepancies.
|
|
Managing these errors and how they propagate through calculations
|
|
is the subject of an entire branch of mathematics and computer
|
|
science and will not be discussed here, except for the
|
|
following points:
|
|
<itemizedlist>
|
|
<listitem>
|
|
<para>
|
|
If you require exact storage and calculations (such as for
|
|
monetary amounts), use the <type>numeric</type> type instead.
|
|
</para>
|
|
</listitem>
|
|
|
|
<listitem>
|
|
<para>
|
|
If you want to do complicated calculations with these types
|
|
for anything important, especially if you rely on certain
|
|
behavior in boundary cases (infinity, underflow), you should
|
|
evaluate the implementation carefully.
|
|
</para>
|
|
</listitem>
|
|
|
|
<listitem>
|
|
<para>
|
|
Comparing two floating-point values for equality might not
|
|
always work as expected.
|
|
</para>
|
|
</listitem>
|
|
</itemizedlist>
|
|
</para>
|
|
|
|
<para>
|
|
On all currently supported platforms, the <type>real</type> type has a
|
|
range of around 1E-37 to 1E+37 with a precision of at least 6 decimal
|
|
digits. The <type>double precision</type> type has a range of around
|
|
1E-307 to 1E+308 with a precision of at least 15 digits. Values that are
|
|
too large or too small will cause an error. Rounding might take place if
|
|
the precision of an input number is too high. Numbers too close to zero
|
|
that are not representable as distinct from zero will cause an underflow
|
|
error.
|
|
</para>
|
|
|
|
<para>
|
|
By default, floating point values are output in text form in their
|
|
shortest precise decimal representation; the decimal value produced is
|
|
closer to the true stored binary value than to any other value
|
|
representable in the same binary precision. (However, the output value is
|
|
currently never <emphasis>exactly</emphasis> midway between two
|
|
representable values, in order to avoid a widespread bug where input
|
|
routines do not properly respect the round-to-nearest-even rule.) This value will
|
|
use at most 17 significant decimal digits for <type>float8</type>
|
|
values, and at most 9 digits for <type>float4</type> values.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
This shortest-precise output format is much faster to generate than the
|
|
historical rounded format.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
For compatibility with output generated by older versions
|
|
of <productname>PostgreSQL</productname>, and to allow the output
|
|
precision to be reduced, the <xref linkend="guc-extra-float-digits"/>
|
|
parameter can be used to select rounded decimal output instead. Setting a
|
|
value of 0 restores the previous default of rounding the value to 6
|
|
(for <type>float4</type>) or 15 (for <type>float8</type>)
|
|
significant decimal digits. Setting a negative value reduces the number
|
|
of digits further; for example -2 would round output to 4 or 13 digits
|
|
respectively.
|
|
</para>
|
|
|
|
<para>
|
|
Any value of <xref linkend="guc-extra-float-digits"/> greater than 0
|
|
selects the shortest-precise format.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
Applications that wanted precise values have historically had to set
|
|
<xref linkend="guc-extra-float-digits"/> to 3 to obtain them. For
|
|
maximum compatibility between versions, they should continue to do so.
|
|
</para>
|
|
</note>
|
|
|
|
<indexterm>
|
|
<primary>infinity</primary>
|
|
<secondary>floating point</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>not a number</primary>
|
|
<secondary>floating point</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
In addition to ordinary numeric values, the floating-point types
|
|
have several special values:
|
|
<literallayout>
|
|
<literal>Infinity</literal>
|
|
<literal>-Infinity</literal>
|
|
<literal>NaN</literal>
|
|
</literallayout>
|
|
These represent the IEEE 754 special values
|
|
<quote>infinity</quote>, <quote>negative infinity</quote>, and
|
|
<quote>not-a-number</quote>, respectively. When writing these values
|
|
as constants in an SQL command, you must put quotes around them,
|
|
for example <literal>UPDATE table SET x = '-Infinity'</literal>. On input,
|
|
these strings are recognized in a case-insensitive manner.
|
|
The infinity values can alternatively be spelled <literal>inf</literal>
|
|
and <literal>-inf</literal>.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
IEEE 754 specifies that <literal>NaN</literal> should not compare equal
|
|
to any other floating-point value (including <literal>NaN</literal>).
|
|
In order to allow floating-point values to be sorted and used
|
|
in tree-based indexes, <productname>PostgreSQL</productname> treats
|
|
<literal>NaN</literal> values as equal, and greater than all
|
|
non-<literal>NaN</literal> values.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> also supports the SQL-standard
|
|
notations <type>float</type> and
|
|
<type>float(<replaceable>p</replaceable>)</type> for specifying
|
|
inexact numeric types. Here, <replaceable>p</replaceable> specifies
|
|
the minimum acceptable precision in <emphasis>binary</emphasis> digits.
|
|
<productname>PostgreSQL</productname> accepts
|
|
<type>float(1)</type> to <type>float(24)</type> as selecting the
|
|
<type>real</type> type, while
|
|
<type>float(25)</type> to <type>float(53)</type> select
|
|
<type>double precision</type>. Values of <replaceable>p</replaceable>
|
|
outside the allowed range draw an error.
|
|
<type>float</type> with no precision specified is taken to mean
|
|
<type>double precision</type>.
|
|
</para>
|
|
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-serial">
|
|
<title>Serial Types</title>
|
|
|
|
<indexterm zone="datatype-serial">
|
|
<primary>smallserial</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-serial">
|
|
<primary>serial</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-serial">
|
|
<primary>bigserial</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-serial">
|
|
<primary>serial2</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-serial">
|
|
<primary>serial4</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-serial">
|
|
<primary>serial8</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>auto-increment</primary>
|
|
<see>serial</see>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>sequence</primary>
|
|
<secondary>and serial type</secondary>
|
|
</indexterm>
|
|
|
|
<note>
|
|
<para>
|
|
This section describes a PostgreSQL-specific way to create an
|
|
autoincrementing column. Another way is to use the SQL-standard
|
|
identity column feature, described at <xref linkend="ddl-identity-columns"/>.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
The data types <type>smallserial</type>, <type>serial</type> and
|
|
<type>bigserial</type> are not true types, but merely
|
|
a notational convenience for creating unique identifier columns
|
|
(similar to the <literal>AUTO_INCREMENT</literal> property
|
|
supported by some other databases). In the current
|
|
implementation, specifying:
|
|
|
|
<programlisting>
|
|
CREATE TABLE <replaceable class="parameter">tablename</replaceable> (
|
|
<replaceable class="parameter">colname</replaceable> SERIAL
|
|
);
|
|
</programlisting>
|
|
|
|
is equivalent to specifying:
|
|
|
|
<programlisting>
|
|
CREATE SEQUENCE <replaceable class="parameter">tablename</replaceable>_<replaceable class="parameter">colname</replaceable>_seq AS integer;
|
|
CREATE TABLE <replaceable class="parameter">tablename</replaceable> (
|
|
<replaceable class="parameter">colname</replaceable> integer NOT NULL DEFAULT nextval('<replaceable class="parameter">tablename</replaceable>_<replaceable class="parameter">colname</replaceable>_seq')
|
|
);
|
|
ALTER SEQUENCE <replaceable class="parameter">tablename</replaceable>_<replaceable class="parameter">colname</replaceable>_seq OWNED BY <replaceable class="parameter">tablename</replaceable>.<replaceable class="parameter">colname</replaceable>;
|
|
</programlisting>
|
|
|
|
Thus, we have created an integer column and arranged for its default
|
|
values to be assigned from a sequence generator. A <literal>NOT NULL</literal>
|
|
constraint is applied to ensure that a null value cannot be
|
|
inserted. (In most cases you would also want to attach a
|
|
<literal>UNIQUE</literal> or <literal>PRIMARY KEY</literal> constraint to prevent
|
|
duplicate values from being inserted by accident, but this is
|
|
not automatic.) Lastly, the sequence is marked as <quote>owned by</quote>
|
|
the column, so that it will be dropped if the column or table is dropped.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
Because <type>smallserial</type>, <type>serial</type> and
|
|
<type>bigserial</type> are implemented using sequences, there may
|
|
be "holes" or gaps in the sequence of values which appears in the
|
|
column, even if no rows are ever deleted. A value allocated
|
|
from the sequence is still "used up" even if a row containing that
|
|
value is never successfully inserted into the table column. This
|
|
may happen, for example, if the inserting transaction rolls back.
|
|
See <literal>nextval()</literal> in <xref linkend="functions-sequence"/>
|
|
for details.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
To insert the next value of the sequence into the <type>serial</type>
|
|
column, specify that the <type>serial</type>
|
|
column should be assigned its default value. This can be done
|
|
either by excluding the column from the list of columns in
|
|
the <command>INSERT</command> statement, or through the use of
|
|
the <literal>DEFAULT</literal> key word.
|
|
</para>
|
|
|
|
<para>
|
|
The type names <type>serial</type> and <type>serial4</type> are
|
|
equivalent: both create <type>integer</type> columns. The type
|
|
names <type>bigserial</type> and <type>serial8</type> work
|
|
the same way, except that they create a <type>bigint</type>
|
|
column. <type>bigserial</type> should be used if you anticipate
|
|
the use of more than 2<superscript>31</superscript> identifiers over the
|
|
lifetime of the table. The type names <type>smallserial</type> and
|
|
<type>serial2</type> also work the same way, except that they
|
|
create a <type>smallint</type> column.
|
|
</para>
|
|
|
|
<para>
|
|
The sequence created for a <type>serial</type> column is
|
|
automatically dropped when the owning column is dropped.
|
|
You can drop the sequence without dropping the column, but this
|
|
will force removal of the column default expression.
|
|
</para>
|
|
</sect2>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-money">
|
|
<title>Monetary Types</title>
|
|
|
|
<para>
|
|
The <type>money</type> type stores a currency amount with a fixed
|
|
fractional precision; see <xref
|
|
linkend="datatype-money-table"/>. The fractional precision is
|
|
determined by the database's <xref linkend="guc-lc-monetary"/> setting.
|
|
The range shown in the table assumes there are two fractional digits.
|
|
Input is accepted in a variety of formats, including integer and
|
|
floating-point literals, as well as typical
|
|
currency formatting, such as <literal>'$1,000.00'</literal>.
|
|
Output is generally in the latter form but depends on the locale.
|
|
</para>
|
|
|
|
<table id="datatype-money-table">
|
|
<title>Monetary Types</title>
|
|
<tgroup cols="4">
|
|
<colspec colname="col1" colwidth="2*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<colspec colname="col4" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
<entry>Range</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>money</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>currency amount</entry>
|
|
<entry>-92233720368547758.08 to +92233720368547758.07</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
Since the output of this data type is locale-sensitive, it might not
|
|
work to load <type>money</type> data into a database that has a different
|
|
setting of <varname>lc_monetary</varname>. To avoid problems, before
|
|
restoring a dump into a new database make sure <varname>lc_monetary</varname> has
|
|
the same or equivalent value as in the database that was dumped.
|
|
</para>
|
|
|
|
<para>
|
|
Values of the <type>numeric</type>, <type>int</type>, and
|
|
<type>bigint</type> data types can be cast to <type>money</type>.
|
|
Conversion from the <type>real</type> and <type>double precision</type>
|
|
data types can be done by casting to <type>numeric</type> first, for
|
|
example:
|
|
<programlisting>
|
|
SELECT '12.34'::float8::numeric::money;
|
|
</programlisting>
|
|
However, this is not recommended. Floating point numbers should not be
|
|
used to handle money due to the potential for rounding errors.
|
|
</para>
|
|
|
|
<para>
|
|
A <type>money</type> value can be cast to <type>numeric</type> without
|
|
loss of precision. Conversion to other types could potentially lose
|
|
precision, and must also be done in two stages:
|
|
<programlisting>
|
|
SELECT '52093.89'::money::numeric::float8;
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
Division of a <type>money</type> value by an integer value is performed
|
|
with truncation of the fractional part towards zero. To get a rounded
|
|
result, divide by a floating-point value, or cast the <type>money</type>
|
|
value to <type>numeric</type> before dividing and back to <type>money</type>
|
|
afterwards. (The latter is preferable to avoid risking precision loss.)
|
|
When a <type>money</type> value is divided by another <type>money</type>
|
|
value, the result is <type>double precision</type> (i.e., a pure number,
|
|
not money); the currency units cancel each other out in the division.
|
|
</para>
|
|
</sect1>
|
|
|
|
|
|
<sect1 id="datatype-character">
|
|
<title>Character Types</title>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>character string</primary>
|
|
<secondary>data types</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>string</primary>
|
|
<see>character string</see>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>character</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>character varying</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>text</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>char</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>varchar</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-character">
|
|
<primary>bpchar</primary>
|
|
</indexterm>
|
|
|
|
<table id="datatype-character-table">
|
|
<title>Character Types</title>
|
|
<tgroup cols="2">
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>character varying(<replaceable>n</replaceable>)</type>, <type>varchar(<replaceable>n</replaceable>)</type></entry>
|
|
<entry>variable-length with limit</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>character(<replaceable>n</replaceable>)</type>, <type>char(<replaceable>n</replaceable>)</type>, <type>bpchar(<replaceable>n</replaceable>)</type></entry>
|
|
<entry>fixed-length, blank-padded</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>bpchar</type></entry>
|
|
<entry>variable unlimited length, blank-trimmed</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>text</type></entry>
|
|
<entry>variable unlimited length</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
<xref linkend="datatype-character-table"/> shows the
|
|
general-purpose character types available in
|
|
<productname>PostgreSQL</productname>.
|
|
</para>
|
|
|
|
<para>
|
|
<acronym>SQL</acronym> defines two primary character types:
|
|
<type>character varying(<replaceable>n</replaceable>)</type> and
|
|
<type>character(<replaceable>n</replaceable>)</type>, where <replaceable>n</replaceable>
|
|
is a positive integer. Both of these types can store strings up to
|
|
<replaceable>n</replaceable> characters (not bytes) in length. An attempt to store a
|
|
longer string into a column of these types will result in an
|
|
error, unless the excess characters are all spaces, in which case
|
|
the string will be truncated to the maximum length. (This somewhat
|
|
bizarre exception is required by the <acronym>SQL</acronym>
|
|
standard.)
|
|
However, if one explicitly casts a value to <type>character
|
|
varying(<replaceable>n</replaceable>)</type> or
|
|
<type>character(<replaceable>n</replaceable>)</type>, then an over-length
|
|
value will be truncated to <replaceable>n</replaceable> characters without
|
|
raising an error. (This too is required by the
|
|
<acronym>SQL</acronym> standard.)
|
|
If the string to be stored is shorter than the declared
|
|
length, values of type <type>character</type> will be space-padded;
|
|
values of type <type>character varying</type> will simply store the
|
|
shorter
|
|
string.
|
|
</para>
|
|
|
|
<para>
|
|
In addition, <productname>PostgreSQL</productname> provides the
|
|
<type>text</type> type, which stores strings of any length.
|
|
Although the <type>text</type> type is not in the
|
|
<acronym>SQL</acronym> standard, several other SQL database
|
|
management systems have it as well.
|
|
<type>text</type> is <productname>PostgreSQL</productname>'s native
|
|
string data type, in that most built-in functions operating on strings
|
|
are declared to take or return <type>text</type> not <type>character
|
|
varying</type>. For many purposes, <type>character varying</type>
|
|
acts as though it were a <link linkend="domains">domain</link>
|
|
over <type>text</type>.
|
|
</para>
|
|
|
|
<para>
|
|
The type name <type>varchar</type> is an alias for <type>character
|
|
varying</type>, while <type>bpchar</type> (with length specifier) and
|
|
<type>char</type> are aliases for <type>character</type>. The
|
|
<type>varchar</type> and <type>char</type> aliases are defined in the
|
|
<acronym>SQL</acronym> standard; <type>bpchar</type> is a
|
|
<productname>PostgreSQL</productname> extension.
|
|
</para>
|
|
|
|
<para>
|
|
If specified, the length <replaceable>n</replaceable> must be greater
|
|
than zero and cannot exceed 10,485,760. If <type>character
|
|
varying</type> (or <type>varchar</type>) is used without
|
|
length specifier, the type accepts strings of any length. If
|
|
<type>bpchar</type> lacks a length specifier, it also accepts strings
|
|
of any length, but trailing spaces are semantically insignificant.
|
|
If <type>character</type> (or <type>char</type>) lacks a specifier,
|
|
it is equivalent to <type>character(1)</type>.
|
|
</para>
|
|
|
|
<para>
|
|
Values of type <type>character</type> are physically padded
|
|
with spaces to the specified width <replaceable>n</replaceable>, and are
|
|
stored and displayed that way. However, trailing spaces are treated as
|
|
semantically insignificant and disregarded when comparing two values
|
|
of type <type>character</type>. In collations where whitespace
|
|
is significant, this behavior can produce unexpected results;
|
|
for example <command>SELECT 'a '::CHAR(2) collate "C" <
|
|
E'a\n'::CHAR(2)</command> returns true, even though <literal>C</literal>
|
|
locale would consider a space to be greater than a newline.
|
|
Trailing spaces are removed when converting a <type>character</type> value
|
|
to one of the other string types. Note that trailing spaces
|
|
<emphasis>are</emphasis> semantically significant in
|
|
<type>character varying</type> and <type>text</type> values, and
|
|
when using pattern matching, that is <literal>LIKE</literal> and
|
|
regular expressions.
|
|
</para>
|
|
|
|
<para>
|
|
The characters that can be stored in any of these data types are
|
|
determined by the database character set, which is selected when
|
|
the database is created. Regardless of the specific character set,
|
|
the character with code zero (sometimes called NUL) cannot be stored.
|
|
For more information refer to <xref linkend="multibyte"/>.
|
|
</para>
|
|
|
|
<para>
|
|
The storage requirement for a short string (up to 126 bytes) is 1 byte
|
|
plus the actual string, which includes the space padding in the case of
|
|
<type>character</type>. Longer strings have 4 bytes of overhead instead
|
|
of 1. Long strings are compressed by the system automatically, so
|
|
the physical requirement on disk might be less. Very long values are also
|
|
stored in background tables so that they do not interfere with rapid
|
|
access to shorter column values. In any case, the longest
|
|
possible character string that can be stored is about 1 GB. (The
|
|
maximum value that will be allowed for <replaceable>n</replaceable> in the data
|
|
type declaration is less than that. It wouldn't be useful to
|
|
change this because with multibyte character encodings the number of
|
|
characters and bytes can be quite different. If you desire to
|
|
store long strings with no specific upper limit, use
|
|
<type>text</type> or <type>character varying</type> without a length
|
|
specifier, rather than making up an arbitrary length limit.)
|
|
</para>
|
|
|
|
<tip>
|
|
<para>
|
|
There is no performance difference among these three types,
|
|
apart from increased storage space when using the blank-padded
|
|
type, and a few extra CPU cycles to check the length when storing into
|
|
a length-constrained column. While
|
|
<type>character(<replaceable>n</replaceable>)</type> has performance
|
|
advantages in some other database systems, there is no such advantage in
|
|
<productname>PostgreSQL</productname>; in fact
|
|
<type>character(<replaceable>n</replaceable>)</type> is usually the slowest of
|
|
the three because of its additional storage costs. In most situations
|
|
<type>text</type> or <type>character varying</type> should be used
|
|
instead.
|
|
</para>
|
|
</tip>
|
|
|
|
<para>
|
|
Refer to <xref linkend="sql-syntax-strings"/> for information about
|
|
the syntax of string literals, and to <xref linkend="functions"/>
|
|
for information about available operators and functions.
|
|
</para>
|
|
|
|
<example>
|
|
<title>Using the Character Types</title>
|
|
|
|
<programlisting>
|
|
CREATE TABLE test1 (a character(4));
|
|
INSERT INTO test1 VALUES ('ok');
|
|
SELECT a, char_length(a) FROM test1; -- <co id="co.datatype-char"/>
|
|
<computeroutput>
|
|
a | char_length
|
|
------+-------------
|
|
ok | 2
|
|
</computeroutput>
|
|
|
|
CREATE TABLE test2 (b varchar(5));
|
|
INSERT INTO test2 VALUES ('ok');
|
|
INSERT INTO test2 VALUES ('good ');
|
|
INSERT INTO test2 VALUES ('too long');
|
|
<computeroutput>ERROR: value too long for type character varying(5)</computeroutput>
|
|
INSERT INTO test2 VALUES ('too long'::varchar(5)); -- explicit truncation
|
|
SELECT b, char_length(b) FROM test2;
|
|
<computeroutput>
|
|
b | char_length
|
|
-------+-------------
|
|
ok | 2
|
|
good | 5
|
|
too l | 5
|
|
</computeroutput>
|
|
</programlisting>
|
|
<calloutlist>
|
|
<callout arearefs="co.datatype-char">
|
|
<para>
|
|
The <function>char_length</function> function is discussed in
|
|
<xref linkend="functions-string"/>.
|
|
</para>
|
|
</callout>
|
|
</calloutlist>
|
|
</example>
|
|
|
|
<para>
|
|
There are two other fixed-length character types in
|
|
<productname>PostgreSQL</productname>, shown in <xref
|
|
linkend="datatype-character-special-table"/>.
|
|
These are not intended for general-purpose use, only for use
|
|
in the internal system catalogs.
|
|
The <type>name</type> type is used to store identifiers. Its
|
|
length is currently defined as 64 bytes (63 usable characters plus
|
|
terminator) but should be referenced using the constant
|
|
<symbol>NAMEDATALEN</symbol> in <literal>C</literal> source code.
|
|
The length is set at compile time (and
|
|
is therefore adjustable for special uses); the default maximum
|
|
length might change in a future release. The type <type>"char"</type>
|
|
(note the quotes) is different from <type>char(1)</type> in that it
|
|
only uses one byte of storage, and therefore can store only a single
|
|
ASCII character. It is used in the system
|
|
catalogs as a simplistic enumeration type.
|
|
</para>
|
|
|
|
<table id="datatype-character-special-table">
|
|
<title>Special Character Types</title>
|
|
<tgroup cols="3">
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>"char"</type></entry>
|
|
<entry>1 byte</entry>
|
|
<entry>single-byte internal type</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>name</type></entry>
|
|
<entry>64 bytes</entry>
|
|
<entry>internal type for object names</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-binary">
|
|
<title>Binary Data Types</title>
|
|
|
|
<indexterm zone="datatype-binary">
|
|
<primary>binary data</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-binary">
|
|
<primary>bytea</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>bytea</type> data type allows storage of binary strings;
|
|
see <xref linkend="datatype-binary-table"/>.
|
|
</para>
|
|
|
|
<table id="datatype-binary-table">
|
|
<title>Binary Data Types</title>
|
|
<tgroup cols="3">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="3*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>bytea</type></entry>
|
|
<entry>1 or 4 bytes plus the actual binary string</entry>
|
|
<entry>variable-length binary string</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
A binary string is a sequence of octets (or bytes). Binary
|
|
strings are distinguished from character strings in two
|
|
ways. First, binary strings specifically allow storing
|
|
octets of value zero and other <quote>non-printable</quote>
|
|
octets (usually, octets outside the decimal range 32 to 126).
|
|
Character strings disallow zero octets, and also disallow any
|
|
other octet values and sequences of octet values that are invalid
|
|
according to the database's selected character set encoding.
|
|
Second, operations on binary strings process the actual bytes,
|
|
whereas the processing of character strings depends on locale settings.
|
|
In short, binary strings are appropriate for storing data that the
|
|
programmer thinks of as <quote>raw bytes</quote>, whereas character
|
|
strings are appropriate for storing text.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>bytea</type> type supports two
|
|
formats for input and output: <quote>hex</quote> format
|
|
and <productname>PostgreSQL</productname>'s historical
|
|
<quote>escape</quote> format. Both
|
|
of these are always accepted on input. The output format depends
|
|
on the configuration parameter <xref linkend="guc-bytea-output"/>;
|
|
the default is hex. (Note that the hex format was introduced in
|
|
<productname>PostgreSQL</productname> 9.0; earlier versions and some
|
|
tools don't understand it.)
|
|
</para>
|
|
|
|
<para>
|
|
The <acronym>SQL</acronym> standard defines a different binary
|
|
string type, called <type>BLOB</type> or <type>BINARY LARGE
|
|
OBJECT</type>. The input format is different from
|
|
<type>bytea</type>, but the provided functions and operators are
|
|
mostly the same.
|
|
</para>
|
|
|
|
<sect2 id="datatype-binary-bytea-hex-format">
|
|
<title><type>bytea</type> Hex Format</title>
|
|
|
|
<para>
|
|
The <quote>hex</quote> format encodes binary data as 2 hexadecimal digits
|
|
per byte, most significant nibble first. The entire string is
|
|
preceded by the sequence <literal>\x</literal> (to distinguish it
|
|
from the escape format). In some contexts, the initial backslash may
|
|
need to be escaped by doubling it
|
|
(see <xref linkend="sql-syntax-strings"/>).
|
|
For input, the hexadecimal digits can
|
|
be either upper or lower case, and whitespace is permitted between
|
|
digit pairs (but not within a digit pair nor in the starting
|
|
<literal>\x</literal> sequence).
|
|
The hex format is compatible with a wide
|
|
range of external applications and protocols, and it tends to be
|
|
faster to convert than the escape format, so its use is preferred.
|
|
</para>
|
|
|
|
<para>
|
|
Example:
|
|
<programlisting>
|
|
SET bytea_output = 'hex';
|
|
|
|
SELECT '\xDEADBEEF'::bytea;
|
|
bytea
|
|
------------
|
|
\xdeadbeef
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-binary-bytea-escape-format">
|
|
<title><type>bytea</type> Escape Format</title>
|
|
|
|
<para>
|
|
The <quote>escape</quote> format is the traditional
|
|
<productname>PostgreSQL</productname> format for the <type>bytea</type>
|
|
type. It
|
|
takes the approach of representing a binary string as a sequence
|
|
of ASCII characters, while converting those bytes that cannot be
|
|
represented as an ASCII character into special escape sequences.
|
|
If, from the point of view of the application, representing bytes
|
|
as characters makes sense, then this representation can be
|
|
convenient. But in practice it is usually confusing because it
|
|
fuzzes up the distinction between binary strings and character
|
|
strings, and also the particular escape mechanism that was chosen is
|
|
somewhat unwieldy. Therefore, this format should probably be avoided
|
|
for most new applications.
|
|
</para>
|
|
|
|
<para>
|
|
When entering <type>bytea</type> values in escape format,
|
|
octets of certain
|
|
values <emphasis>must</emphasis> be escaped, while all octet
|
|
values <emphasis>can</emphasis> be escaped. In
|
|
general, to escape an octet, convert it into its three-digit
|
|
octal value and precede it by a backslash.
|
|
Backslash itself (octet decimal value 92) can alternatively be represented by
|
|
double backslashes.
|
|
<xref linkend="datatype-binary-sqlesc"/>
|
|
shows the characters that must be escaped, and gives the alternative
|
|
escape sequences where applicable.
|
|
</para>
|
|
|
|
<table id="datatype-binary-sqlesc">
|
|
<title><type>bytea</type> Literal Escaped Octets</title>
|
|
<tgroup cols="5">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="1*"/>
|
|
<colspec colname="col4" colwidth="1.25*"/>
|
|
<colspec colname="col5" colwidth="1*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Decimal Octet Value</entry>
|
|
<entry>Description</entry>
|
|
<entry>Escaped Input Representation</entry>
|
|
<entry>Example</entry>
|
|
<entry>Hex Representation</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry>0</entry>
|
|
<entry>zero octet</entry>
|
|
<entry><literal>'\000'</literal></entry>
|
|
<entry><literal>'\000'::bytea</literal></entry>
|
|
<entry><literal>\x00</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry>39</entry>
|
|
<entry>single quote</entry>
|
|
<entry><literal>''''</literal> or <literal>'\047'</literal></entry>
|
|
<entry><literal>''''::bytea</literal></entry>
|
|
<entry><literal>\x27</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry>92</entry>
|
|
<entry>backslash</entry>
|
|
<entry><literal>'\\'</literal> or <literal>'\134'</literal></entry>
|
|
<entry><literal>'\\'::bytea</literal></entry>
|
|
<entry><literal>\x5c</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry>0 to 31 and 127 to 255</entry>
|
|
<entry><quote>non-printable</quote> octets</entry>
|
|
<entry><literal>'\<replaceable>xxx'</replaceable></literal> (octal value)</entry>
|
|
<entry><literal>'\001'::bytea</literal></entry>
|
|
<entry><literal>\x01</literal></entry>
|
|
</row>
|
|
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
The requirement to escape <emphasis>non-printable</emphasis> octets
|
|
varies depending on locale settings. In some instances you can get away
|
|
with leaving them unescaped.
|
|
</para>
|
|
|
|
<para>
|
|
The reason that single quotes must be doubled, as shown
|
|
in <xref linkend="datatype-binary-sqlesc"/>, is that this
|
|
is true for any string literal in an SQL command. The generic
|
|
string-literal parser consumes the outermost single quotes
|
|
and reduces any pair of single quotes to one data character.
|
|
What the <type>bytea</type> input function sees is just one
|
|
single quote, which it treats as a plain data character.
|
|
However, the <type>bytea</type> input function treats
|
|
backslashes as special, and the other behaviors shown in
|
|
<xref linkend="datatype-binary-sqlesc"/> are implemented by
|
|
that function.
|
|
</para>
|
|
|
|
<para>
|
|
In some contexts, backslashes must be doubled compared to what is
|
|
shown above, because the generic string-literal parser will also
|
|
reduce pairs of backslashes to one data character;
|
|
see <xref linkend="sql-syntax-strings"/>.
|
|
</para>
|
|
|
|
<para>
|
|
<type>Bytea</type> octets are output in <literal>hex</literal>
|
|
format by default. If you change <xref linkend="guc-bytea-output"/>
|
|
to <literal>escape</literal>,
|
|
<quote>non-printable</quote> octets are converted to their
|
|
equivalent three-digit octal value and preceded by one backslash.
|
|
Most <quote>printable</quote> octets are output by their standard
|
|
representation in the client character set, e.g.:
|
|
|
|
<programlisting>
|
|
SET bytea_output = 'escape';
|
|
|
|
SELECT 'abc \153\154\155 \052\251\124'::bytea;
|
|
bytea
|
|
----------------
|
|
abc klm *\251T
|
|
</programlisting>
|
|
|
|
The octet with decimal value 92 (backslash) is doubled in the output.
|
|
Details are in <xref linkend="datatype-binary-resesc"/>.
|
|
</para>
|
|
|
|
<table id="datatype-binary-resesc">
|
|
<title><type>bytea</type> Output Escaped Octets</title>
|
|
<tgroup cols="5">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="1*"/>
|
|
<colspec colname="col4" colwidth="1.25*"/>
|
|
<colspec colname="col5" colwidth="1*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Decimal Octet Value</entry>
|
|
<entry>Description</entry>
|
|
<entry>Escaped Output Representation</entry>
|
|
<entry>Example</entry>
|
|
<entry>Output Result</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
|
|
<row>
|
|
<entry>92</entry>
|
|
<entry>backslash</entry>
|
|
<entry><literal>\\</literal></entry>
|
|
<entry><literal>'\134'::bytea</literal></entry>
|
|
<entry><literal>\\</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry>0 to 31 and 127 to 255</entry>
|
|
<entry><quote>non-printable</quote> octets</entry>
|
|
<entry><literal>\<replaceable>xxx</replaceable></literal> (octal value)</entry>
|
|
<entry><literal>'\001'::bytea</literal></entry>
|
|
<entry><literal>\001</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry>32 to 126</entry>
|
|
<entry><quote>printable</quote> octets</entry>
|
|
<entry>client character set representation</entry>
|
|
<entry><literal>'\176'::bytea</literal></entry>
|
|
<entry><literal>~</literal></entry>
|
|
</row>
|
|
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
Depending on the front end to <productname>PostgreSQL</productname> you use,
|
|
you might have additional work to do in terms of escaping and
|
|
unescaping <type>bytea</type> strings. For example, you might also
|
|
have to escape line feeds and carriage returns if your interface
|
|
automatically translates these.
|
|
</para>
|
|
</sect2>
|
|
</sect1>
|
|
|
|
|
|
<sect1 id="datatype-datetime">
|
|
<title>Date/Time Types</title>
|
|
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>date</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>time</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>time without time zone</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>time with time zone</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>timestamp</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>timestamptz</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>timestamp with time zone</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>timestamp without time zone</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>interval</primary>
|
|
</indexterm>
|
|
<indexterm zone="datatype-datetime">
|
|
<primary>time span</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> supports the full set of
|
|
<acronym>SQL</acronym> date and time types, shown in <xref
|
|
linkend="datatype-datetime-table"/>. The operations available
|
|
on these data types are described in
|
|
<xref linkend="functions-datetime"/>.
|
|
Dates are counted according to the Gregorian calendar, even in
|
|
years before that calendar was introduced (see <xref
|
|
linkend="datetime-units-history"/> for more information).
|
|
</para>
|
|
|
|
<table id="datatype-datetime-table">
|
|
<title>Date/Time Types</title>
|
|
<tgroup cols="6">
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
<entry>Low Value</entry>
|
|
<entry>High Value</entry>
|
|
<entry>Resolution</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>timestamp [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>both date and time (no time zone)</entry>
|
|
<entry>4713 BC</entry>
|
|
<entry>294276 AD</entry>
|
|
<entry>1 microsecond</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>timestamp [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>both date and time, with time zone</entry>
|
|
<entry>4713 BC</entry>
|
|
<entry>294276 AD</entry>
|
|
<entry>1 microsecond</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>date</type></entry>
|
|
<entry>4 bytes</entry>
|
|
<entry>date (no time of day)</entry>
|
|
<entry>4713 BC</entry>
|
|
<entry>5874897 AD</entry>
|
|
<entry>1 day</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>time [ (<replaceable>p</replaceable>) ] [ without time zone ]</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>time of day (no date)</entry>
|
|
<entry>00:00:00</entry>
|
|
<entry>24:00:00</entry>
|
|
<entry>1 microsecond</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>time [ (<replaceable>p</replaceable>) ] with time zone</type></entry>
|
|
<entry>12 bytes</entry>
|
|
<entry>time of day (no date), with time zone</entry>
|
|
<!-- see MAX_TZDISP_HOUR in datatype/timestamp.h -->
|
|
<entry>00:00:00+1559</entry>
|
|
<entry>24:00:00-1559</entry>
|
|
<entry>1 microsecond</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>interval [ <replaceable>fields</replaceable> ] [ (<replaceable>p</replaceable>) ]</type></entry>
|
|
<entry>16 bytes</entry>
|
|
<entry>time interval</entry>
|
|
<entry>-178000000 years</entry>
|
|
<entry>178000000 years</entry>
|
|
<entry>1 microsecond</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<note>
|
|
<para>
|
|
The SQL standard requires that writing just <type>timestamp</type>
|
|
be equivalent to <type>timestamp without time
|
|
zone</type>, and <productname>PostgreSQL</productname> honors that
|
|
behavior. <type>timestamptz</type> is accepted as an
|
|
abbreviation for <type>timestamp with time zone</type>; this is a
|
|
<productname>PostgreSQL</productname> extension.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
<type>time</type>, <type>timestamp</type>, and
|
|
<type>interval</type> accept an optional precision value
|
|
<replaceable>p</replaceable> which specifies the number of
|
|
fractional digits retained in the seconds field. By default, there
|
|
is no explicit bound on precision. The allowed range of
|
|
<replaceable>p</replaceable> is from 0 to 6.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>interval</type> type has an additional option, which is
|
|
to restrict the set of stored fields by writing one of these phrases:
|
|
<literallayout class="monospaced">
|
|
YEAR
|
|
MONTH
|
|
DAY
|
|
HOUR
|
|
MINUTE
|
|
SECOND
|
|
YEAR TO MONTH
|
|
DAY TO HOUR
|
|
DAY TO MINUTE
|
|
DAY TO SECOND
|
|
HOUR TO MINUTE
|
|
HOUR TO SECOND
|
|
MINUTE TO SECOND
|
|
</literallayout>
|
|
Note that if both <replaceable>fields</replaceable> and
|
|
<replaceable>p</replaceable> are specified, the
|
|
<replaceable>fields</replaceable> must include <literal>SECOND</literal>,
|
|
since the precision applies only to the seconds.
|
|
</para>
|
|
|
|
<para>
|
|
The type <type>time with time zone</type> is defined by the SQL
|
|
standard, but the definition exhibits properties which lead to
|
|
questionable usefulness. In most cases, a combination of
|
|
<type>date</type>, <type>time</type>, <type>timestamp without time
|
|
zone</type>, and <type>timestamp with time zone</type> should
|
|
provide a complete range of date/time functionality required by
|
|
any application.
|
|
</para>
|
|
|
|
<sect2 id="datatype-datetime-input">
|
|
<title>Date/Time Input</title>
|
|
|
|
<para>
|
|
Date and time input is accepted in almost any reasonable format, including
|
|
ISO 8601, <acronym>SQL</acronym>-compatible,
|
|
traditional <productname>POSTGRES</productname>, and others.
|
|
For some formats, ordering of day, month, and year in date input is
|
|
ambiguous and there is support for specifying the expected
|
|
ordering of these fields. Set the <xref linkend="guc-datestyle"/> parameter
|
|
to <literal>MDY</literal> to select month-day-year interpretation,
|
|
<literal>DMY</literal> to select day-month-year interpretation, or
|
|
<literal>YMD</literal> to select year-month-day interpretation.
|
|
</para>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> is more flexible in
|
|
handling date/time input than the
|
|
<acronym>SQL</acronym> standard requires.
|
|
See <xref linkend="datetime-appendix"/>
|
|
for the exact parsing rules of date/time input and for the
|
|
recognized text fields including months, days of the week, and
|
|
time zones.
|
|
</para>
|
|
|
|
<para>
|
|
Remember that any date or time literal input needs to be enclosed
|
|
in single quotes, like text strings. Refer to
|
|
<xref linkend="sql-syntax-constants-generic"/> for more
|
|
information.
|
|
<acronym>SQL</acronym> requires the following syntax
|
|
<synopsis>
|
|
<replaceable>type</replaceable> [ (<replaceable>p</replaceable>) ] '<replaceable>value</replaceable>'
|
|
</synopsis>
|
|
where <replaceable>p</replaceable> is an optional precision
|
|
specification giving the number of
|
|
fractional digits in the seconds field. Precision can be
|
|
specified for <type>time</type>, <type>timestamp</type>, and
|
|
<type>interval</type> types, and can range from 0 to 6.
|
|
If no precision is specified in a constant specification,
|
|
it defaults to the precision of the literal value (but not
|
|
more than 6 digits).
|
|
</para>
|
|
|
|
<sect3 id="datatype-datetime-input-dates">
|
|
<title>Dates</title>
|
|
|
|
<indexterm>
|
|
<primary>date</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<xref linkend="datatype-datetime-date-table"/> shows some possible
|
|
inputs for the <type>date</type> type.
|
|
</para>
|
|
|
|
<table id="datatype-datetime-date-table">
|
|
<title>Date Input</title>
|
|
<tgroup cols="2">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Example</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry>1999-01-08</entry>
|
|
<entry>ISO 8601; January 8 in any mode
|
|
(recommended format)</entry>
|
|
</row>
|
|
<row>
|
|
<entry>January 8, 1999</entry>
|
|
<entry>unambiguous in any <varname>datestyle</varname> input mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>1/8/1999</entry>
|
|
<entry>January 8 in <literal>MDY</literal> mode;
|
|
August 1 in <literal>DMY</literal> mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>1/18/1999</entry>
|
|
<entry>January 18 in <literal>MDY</literal> mode;
|
|
rejected in other modes</entry>
|
|
</row>
|
|
<row>
|
|
<entry>01/02/03</entry>
|
|
<entry>January 2, 2003 in <literal>MDY</literal> mode;
|
|
February 1, 2003 in <literal>DMY</literal> mode;
|
|
February 3, 2001 in <literal>YMD</literal> mode
|
|
</entry>
|
|
</row>
|
|
<row>
|
|
<entry>1999-Jan-08</entry>
|
|
<entry>January 8 in any mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>Jan-08-1999</entry>
|
|
<entry>January 8 in any mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>08-Jan-1999</entry>
|
|
<entry>January 8 in any mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>99-Jan-08</entry>
|
|
<entry>January 8 in <literal>YMD</literal> mode, else error</entry>
|
|
</row>
|
|
<row>
|
|
<entry>08-Jan-99</entry>
|
|
<entry>January 8, except error in <literal>YMD</literal> mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>Jan-08-99</entry>
|
|
<entry>January 8, except error in <literal>YMD</literal> mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>19990108</entry>
|
|
<entry>ISO 8601; January 8, 1999 in any mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>990108</entry>
|
|
<entry>ISO 8601; January 8, 1999 in any mode</entry>
|
|
</row>
|
|
<row>
|
|
<entry>1999.008</entry>
|
|
<entry>year and day of year</entry>
|
|
</row>
|
|
<row>
|
|
<entry>J2451187</entry>
|
|
<entry>Julian date</entry>
|
|
</row>
|
|
<row>
|
|
<entry>January 8, 99 BC</entry>
|
|
<entry>year 99 BC</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
</sect3>
|
|
|
|
<sect3 id="datatype-datetime-input-times">
|
|
<title>Times</title>
|
|
|
|
<indexterm>
|
|
<primary>time</primary>
|
|
</indexterm>
|
|
<indexterm>
|
|
<primary>time without time zone</primary>
|
|
</indexterm>
|
|
<indexterm>
|
|
<primary>time with time zone</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The time-of-day types are <type>time [
|
|
(<replaceable>p</replaceable>) ] without time zone</type> and
|
|
<type>time [ (<replaceable>p</replaceable>) ] with time
|
|
zone</type>. <type>time</type> alone is equivalent to
|
|
<type>time without time zone</type>.
|
|
</para>
|
|
|
|
<para>
|
|
Valid input for these types consists of a time of day followed
|
|
by an optional time zone. (See <xref
|
|
linkend="datatype-datetime-time-table"/>
|
|
and <xref linkend="datatype-timezone-table"/>.) If a time zone is
|
|
specified in the input for <type>time without time zone</type>,
|
|
it is silently ignored. You can also specify a date but it will
|
|
be ignored, except when you use a time zone name that involves a
|
|
daylight-savings rule, such as
|
|
<literal>America/New_York</literal>. In this case specifying the date
|
|
is required in order to determine whether standard or daylight-savings
|
|
time applies. The appropriate time zone offset is recorded in the
|
|
<type>time with time zone</type> value and is output as stored;
|
|
it is not adjusted to the active time zone.
|
|
</para>
|
|
|
|
<table id="datatype-datetime-time-table">
|
|
<title>Time Input</title>
|
|
<tgroup cols="2">
|
|
<colspec colname="col1" colwidth="3*"/>
|
|
<colspec colname="col2" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Example</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>04:05:06.789</literal></entry>
|
|
<entry>ISO 8601</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05:06</literal></entry>
|
|
<entry>ISO 8601</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05</literal></entry>
|
|
<entry>ISO 8601</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>040506</literal></entry>
|
|
<entry>ISO 8601</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05 AM</literal></entry>
|
|
<entry>same as 04:05; AM does not affect value</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05 PM</literal></entry>
|
|
<entry>same as 16:05; input hour must be <= 12</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05:06.789-8</literal></entry>
|
|
<entry>ISO 8601, with time zone as UTC offset</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05:06-08:00</literal></entry>
|
|
<entry>ISO 8601, with time zone as UTC offset</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05-08:00</literal></entry>
|
|
<entry>ISO 8601, with time zone as UTC offset</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>040506-08</literal></entry>
|
|
<entry>ISO 8601, with time zone as UTC offset</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>040506+0730</literal></entry>
|
|
<entry>ISO 8601, with fractional-hour time zone as UTC offset</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>040506+07:30:00</literal></entry>
|
|
<entry>UTC offset specified to seconds (not allowed in ISO 8601)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>04:05:06 PST</literal></entry>
|
|
<entry>time zone specified by abbreviation</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>2003-04-12 04:05:06 America/New_York</literal></entry>
|
|
<entry>time zone specified by full name</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<table tocentry="1" id="datatype-timezone-table">
|
|
<title>Time Zone Input</title>
|
|
<tgroup cols="2">
|
|
<thead>
|
|
<row>
|
|
<entry>Example</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>PST</literal></entry>
|
|
<entry>Abbreviation (for Pacific Standard Time)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>America/New_York</literal></entry>
|
|
<entry>Full time zone name</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>PST8PDT</literal></entry>
|
|
<entry>POSIX-style time zone specification</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>-8:00:00</literal></entry>
|
|
<entry>UTC offset for PST</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>-8:00</literal></entry>
|
|
<entry>UTC offset for PST (ISO 8601 extended format)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>-800</literal></entry>
|
|
<entry>UTC offset for PST (ISO 8601 basic format)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>-8</literal></entry>
|
|
<entry>UTC offset for PST (ISO 8601 basic format)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>zulu</literal></entry>
|
|
<entry>Military abbreviation for UTC</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>z</literal></entry>
|
|
<entry>Short form of <literal>zulu</literal> (also in ISO 8601)</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
Refer to <xref linkend="datatype-timezones"/> for more information on how
|
|
to specify time zones.
|
|
</para>
|
|
</sect3>
|
|
|
|
<sect3 id="datatype-datetime-input-time-stamps">
|
|
<title>Time Stamps</title>
|
|
|
|
<indexterm>
|
|
<primary>timestamp</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>timestamp with time zone</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>timestamp without time zone</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Valid input for the time stamp types consists of the concatenation
|
|
of a date and a time, followed by an optional time zone,
|
|
followed by an optional <literal>AD</literal> or <literal>BC</literal>.
|
|
(Alternatively, <literal>AD</literal>/<literal>BC</literal> can appear
|
|
before the time zone, but this is not the preferred ordering.)
|
|
Thus:
|
|
|
|
<programlisting>
|
|
1999-01-08 04:05:06
|
|
</programlisting>
|
|
and:
|
|
<programlisting>
|
|
1999-01-08 04:05:06 -8:00
|
|
</programlisting>
|
|
|
|
are valid values, which follow the <acronym>ISO</acronym> 8601
|
|
standard. In addition, the common format:
|
|
<programlisting>
|
|
January 8 04:05:06 1999 PST
|
|
</programlisting>
|
|
is supported.
|
|
</para>
|
|
|
|
<para>
|
|
The <acronym>SQL</acronym> standard differentiates
|
|
<type>timestamp without time zone</type>
|
|
and <type>timestamp with time zone</type> literals by the presence of a
|
|
<quote>+</quote> or <quote>-</quote> symbol and time zone offset after
|
|
the time. Hence, according to the standard,
|
|
|
|
<programlisting>
|
|
TIMESTAMP '2004-10-19 10:23:54'
|
|
</programlisting>
|
|
|
|
is a <type>timestamp without time zone</type>, while
|
|
|
|
<programlisting>
|
|
TIMESTAMP '2004-10-19 10:23:54+02'
|
|
</programlisting>
|
|
|
|
is a <type>timestamp with time zone</type>.
|
|
<productname>PostgreSQL</productname> never examines the content of a
|
|
literal string before determining its type, and therefore will treat
|
|
both of the above as <type>timestamp without time zone</type>. To
|
|
ensure that a literal is treated as <type>timestamp with time
|
|
zone</type>, give it the correct explicit type:
|
|
|
|
<programlisting>
|
|
TIMESTAMP WITH TIME ZONE '2004-10-19 10:23:54+02'
|
|
</programlisting>
|
|
|
|
In a literal that has been determined to be <type>timestamp without time
|
|
zone</type>, <productname>PostgreSQL</productname> will silently ignore
|
|
any time zone indication.
|
|
That is, the resulting value is derived from the date/time
|
|
fields in the input value, and is not adjusted for time zone.
|
|
</para>
|
|
|
|
<para>
|
|
For <type>timestamp with time zone</type>, the internally stored
|
|
value is always in UTC (Universal
|
|
Coordinated Time, traditionally known as Greenwich Mean Time,
|
|
<acronym>GMT</acronym>). An input value that has an explicit
|
|
time zone specified is converted to UTC using the appropriate offset
|
|
for that time zone. If no time zone is stated in the input string,
|
|
then it is assumed to be in the time zone indicated by the system's
|
|
<xref linkend="guc-timezone"/> parameter, and is converted to UTC using the
|
|
offset for the <varname>timezone</varname> zone.
|
|
</para>
|
|
|
|
<para>
|
|
When a <type>timestamp with time
|
|
zone</type> value is output, it is always converted from UTC to the
|
|
current <varname>timezone</varname> zone, and displayed as local time in that
|
|
zone. To see the time in another time zone, either change
|
|
<varname>timezone</varname> or use the <literal>AT TIME ZONE</literal> construct
|
|
(see <xref linkend="functions-datetime-zoneconvert"/>).
|
|
</para>
|
|
|
|
<para>
|
|
Conversions between <type>timestamp without time zone</type> and
|
|
<type>timestamp with time zone</type> normally assume that the
|
|
<type>timestamp without time zone</type> value should be taken or given
|
|
as <varname>timezone</varname> local time. A different time zone can
|
|
be specified for the conversion using <literal>AT TIME ZONE</literal>.
|
|
</para>
|
|
</sect3>
|
|
|
|
<sect3 id="datatype-datetime-special-values">
|
|
<title>Special Values</title>
|
|
|
|
<indexterm>
|
|
<primary>time</primary>
|
|
<secondary>constants</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>date</primary>
|
|
<secondary>constants</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> supports several
|
|
special date/time input values for convenience, as shown in <xref
|
|
linkend="datatype-datetime-special-table"/>. The values
|
|
<literal>infinity</literal> and <literal>-infinity</literal>
|
|
are specially represented inside the system and will be displayed
|
|
unchanged; but the others are simply notational shorthands
|
|
that will be converted to ordinary date/time values when read.
|
|
(In particular, <literal>now</literal> and related strings are converted
|
|
to a specific time value as soon as they are read.)
|
|
All of these values need to be enclosed in single quotes when used
|
|
as constants in SQL commands.
|
|
</para>
|
|
|
|
<table id="datatype-datetime-special-table">
|
|
<title>Special Date/Time Inputs</title>
|
|
<tgroup cols="3">
|
|
<thead>
|
|
<row>
|
|
<entry>Input String</entry>
|
|
<entry>Valid Types</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>epoch</literal></entry>
|
|
<entry><type>date</type>, <type>timestamp</type></entry>
|
|
<entry>1970-01-01 00:00:00+00 (Unix system time zero)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>infinity</literal></entry>
|
|
<entry><type>date</type>, <type>timestamp</type>, <type>interval</type></entry>
|
|
<entry>later than all other time stamps</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>-infinity</literal></entry>
|
|
<entry><type>date</type>, <type>timestamp</type>, <type>interval</type></entry>
|
|
<entry>earlier than all other time stamps</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>now</literal></entry>
|
|
<entry><type>date</type>, <type>time</type>, <type>timestamp</type></entry>
|
|
<entry>current transaction's start time</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>today</literal></entry>
|
|
<entry><type>date</type>, <type>timestamp</type></entry>
|
|
<entry>midnight (<literal>00:00</literal>) today</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>tomorrow</literal></entry>
|
|
<entry><type>date</type>, <type>timestamp</type></entry>
|
|
<entry>midnight (<literal>00:00</literal>) tomorrow</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>yesterday</literal></entry>
|
|
<entry><type>date</type>, <type>timestamp</type></entry>
|
|
<entry>midnight (<literal>00:00</literal>) yesterday</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>allballs</literal></entry>
|
|
<entry><type>time</type></entry>
|
|
<entry>00:00:00.00 UTC</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
The following <acronym>SQL</acronym>-compatible functions can also
|
|
be used to obtain the current time value for the corresponding data
|
|
type:
|
|
<literal>CURRENT_DATE</literal>, <literal>CURRENT_TIME</literal>,
|
|
<literal>CURRENT_TIMESTAMP</literal>, <literal>LOCALTIME</literal>,
|
|
<literal>LOCALTIMESTAMP</literal>. (See <xref
|
|
linkend="functions-datetime-current"/>.) Note that these are
|
|
SQL functions and are <emphasis>not</emphasis> recognized in data input strings.
|
|
</para>
|
|
|
|
<caution>
|
|
<para>
|
|
While the input strings <literal>now</literal>,
|
|
<literal>today</literal>, <literal>tomorrow</literal>,
|
|
and <literal>yesterday</literal> are fine to use in interactive SQL
|
|
commands, they can have surprising behavior when the command is
|
|
saved to be executed later, for example in prepared statements,
|
|
views, and function definitions. The string can be converted to a
|
|
specific time value that continues to be used long after it becomes
|
|
stale. Use one of the SQL functions instead in such contexts.
|
|
For example, <literal>CURRENT_DATE + 1</literal> is safer than
|
|
<literal>'tomorrow'::date</literal>.
|
|
</para>
|
|
</caution>
|
|
|
|
</sect3>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-datetime-output">
|
|
<title>Date/Time Output</title>
|
|
|
|
<indexterm>
|
|
<primary>date</primary>
|
|
<secondary>output format</secondary>
|
|
<seealso>formatting</seealso>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>time</primary>
|
|
<secondary>output format</secondary>
|
|
<seealso>formatting</seealso>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The output format of the date/time types can be set to one of the four
|
|
styles ISO 8601,
|
|
<acronym>SQL</acronym> (Ingres), traditional <productname>POSTGRES</productname>
|
|
(Unix <application>date</application> format), or
|
|
German. The default
|
|
is the <acronym>ISO</acronym> format. (The
|
|
<acronym>SQL</acronym> standard requires the use of the ISO 8601
|
|
format. The name of the <quote>SQL</quote> output format is a
|
|
historical accident.) <xref
|
|
linkend="datatype-datetime-output-table"/> shows examples of each
|
|
output style. The output of the <type>date</type> and
|
|
<type>time</type> types is generally only the date or time part
|
|
in accordance with the given examples. However, the
|
|
<productname>POSTGRES</productname> style outputs date-only values in
|
|
<acronym>ISO</acronym> format.
|
|
</para>
|
|
|
|
<table id="datatype-datetime-output-table">
|
|
<title>Date/Time Output Styles</title>
|
|
<tgroup cols="3">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Style Specification</entry>
|
|
<entry>Description</entry>
|
|
<entry>Example</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>ISO</literal></entry>
|
|
<entry>ISO 8601, SQL standard</entry>
|
|
<entry><literal>1997-12-17 07:37:16-08</literal></entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>SQL</literal></entry>
|
|
<entry>traditional style</entry>
|
|
<entry><literal>12/17/1997 07:37:16.00 PST</literal></entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>Postgres</literal></entry>
|
|
<entry>original style</entry>
|
|
<entry><literal>Wed Dec 17 07:37:16 1997 PST</literal></entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>German</literal></entry>
|
|
<entry>regional style</entry>
|
|
<entry><literal>17.12.1997 07:37:16.00 PST</literal></entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<note>
|
|
<para>
|
|
ISO 8601 specifies the use of uppercase letter <literal>T</literal> to separate
|
|
the date and time. <productname>PostgreSQL</productname> accepts that format on
|
|
input, but on output it uses a space rather than <literal>T</literal>, as shown
|
|
above. This is for readability and for consistency with
|
|
<ulink url="https://datatracker.ietf.org/doc/html/rfc3339">RFC 3339</ulink> as
|
|
well as some other database systems.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
In the <acronym>SQL</acronym> and POSTGRES styles, day appears before
|
|
month if DMY field ordering has been specified, otherwise month appears
|
|
before day.
|
|
(See <xref linkend="datatype-datetime-input"/>
|
|
for how this setting also affects interpretation of input values.)
|
|
<xref linkend="datatype-datetime-output2-table"/> shows examples.
|
|
</para>
|
|
|
|
<table id="datatype-datetime-output2-table">
|
|
<title>Date Order Conventions</title>
|
|
<tgroup cols="3">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry><varname>datestyle</varname> Setting</entry>
|
|
<entry>Input Ordering</entry>
|
|
<entry>Example Output</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>SQL, DMY</literal></entry>
|
|
<entry><replaceable>day</replaceable>/<replaceable>month</replaceable>/<replaceable>year</replaceable></entry>
|
|
<entry><literal>17/12/1997 15:37:16.00 CET</literal></entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>SQL, MDY</literal></entry>
|
|
<entry><replaceable>month</replaceable>/<replaceable>day</replaceable>/<replaceable>year</replaceable></entry>
|
|
<entry><literal>12/17/1997 07:37:16.00 PST</literal></entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>Postgres, DMY</literal></entry>
|
|
<entry><replaceable>day</replaceable>/<replaceable>month</replaceable>/<replaceable>year</replaceable></entry>
|
|
<entry><literal>Wed 17 Dec 07:37:16 1997 PST</literal></entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
In the <acronym>ISO</acronym> style, the time zone is always shown as
|
|
a signed numeric offset from UTC, with positive sign used for zones
|
|
east of Greenwich. The offset will be shown
|
|
as <replaceable>hh</replaceable> (hours only) if it is an integral
|
|
number of hours, else
|
|
as <replaceable>hh</replaceable>:<replaceable>mm</replaceable> if it
|
|
is an integral number of minutes, else as
|
|
<replaceable>hh</replaceable>:<replaceable>mm</replaceable>:<replaceable>ss</replaceable>.
|
|
(The third case is not possible with any modern time zone standard,
|
|
but it can appear when working with timestamps that predate the
|
|
adoption of standardized time zones.)
|
|
In the other date styles, the time zone is shown as an alphabetic
|
|
abbreviation if one is in common use in the current zone. Otherwise
|
|
it appears as a signed numeric offset in ISO 8601 basic format
|
|
(<replaceable>hh</replaceable> or <replaceable>hhmm</replaceable>).
|
|
</para>
|
|
|
|
<para>
|
|
The date/time style can be selected by the user using the
|
|
<command>SET datestyle</command> command, the <xref
|
|
linkend="guc-datestyle"/> parameter in the
|
|
<filename>postgresql.conf</filename> configuration file, or the
|
|
<envar>PGDATESTYLE</envar> environment variable on the server or
|
|
client.
|
|
</para>
|
|
|
|
<para>
|
|
The formatting function <function>to_char</function>
|
|
(see <xref linkend="functions-formatting"/>) is also available as
|
|
a more flexible way to format date/time output.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-timezones">
|
|
<title>Time Zones</title>
|
|
|
|
<indexterm zone="datatype-timezones">
|
|
<primary>time zone</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Time zones, and time-zone conventions, are influenced by
|
|
political decisions, not just earth geometry. Time zones around the
|
|
world became somewhat standardized during the 1900s,
|
|
but continue to be prone to arbitrary changes, particularly with
|
|
respect to daylight-savings rules.
|
|
<productname>PostgreSQL</productname> uses the widely-used
|
|
IANA (Olson) time zone database for information about
|
|
historical time zone rules. For times in the future, the assumption
|
|
is that the latest known rules for a given time zone will
|
|
continue to be observed indefinitely far into the future.
|
|
</para>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> endeavors to be compatible with
|
|
the <acronym>SQL</acronym> standard definitions for typical usage.
|
|
However, the <acronym>SQL</acronym> standard has an odd mix of date and
|
|
time types and capabilities. Two obvious problems are:
|
|
|
|
<itemizedlist>
|
|
<listitem>
|
|
<para>
|
|
Although the <type>date</type> type
|
|
cannot have an associated time zone, the
|
|
<type>time</type> type can.
|
|
Time zones in the real world have little meaning unless
|
|
associated with a date as well as a time,
|
|
since the offset can vary through the year with daylight-saving
|
|
time boundaries.
|
|
</para>
|
|
</listitem>
|
|
|
|
<listitem>
|
|
<para>
|
|
The default time zone is specified as a constant numeric offset
|
|
from <acronym>UTC</acronym>. It is therefore impossible to adapt to
|
|
daylight-saving time when doing date/time arithmetic across
|
|
<acronym>DST</acronym> boundaries.
|
|
</para>
|
|
</listitem>
|
|
|
|
</itemizedlist>
|
|
</para>
|
|
|
|
<para>
|
|
To address these difficulties, we recommend using date/time types
|
|
that contain both date and time when using time zones. We
|
|
do <emphasis>not</emphasis> recommend using the type <type>time with
|
|
time zone</type> (though it is supported by
|
|
<productname>PostgreSQL</productname> for legacy applications and
|
|
for compliance with the <acronym>SQL</acronym> standard).
|
|
<productname>PostgreSQL</productname> assumes
|
|
your local time zone for any type containing only date or time.
|
|
</para>
|
|
|
|
<para>
|
|
All timezone-aware dates and times are stored internally in
|
|
<acronym>UTC</acronym>. They are converted to local time
|
|
in the zone specified by the <xref linkend="guc-timezone"/> configuration
|
|
parameter before being displayed to the client.
|
|
</para>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> allows you to specify time zones in
|
|
three different forms:
|
|
<itemizedlist>
|
|
<listitem>
|
|
<para>
|
|
A full time zone name, for example <literal>America/New_York</literal>.
|
|
The recognized time zone names are listed in the
|
|
<literal>pg_timezone_names</literal> view (see <xref
|
|
linkend="view-pg-timezone-names"/>).
|
|
<productname>PostgreSQL</productname> uses the widely-used IANA
|
|
time zone data for this purpose, so the same time zone
|
|
names are also recognized by other software.
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
A time zone abbreviation, for example <literal>PST</literal>. Such a
|
|
specification merely defines a particular offset from UTC, in
|
|
contrast to full time zone names which can imply a set of daylight
|
|
savings transition rules as well. The recognized abbreviations
|
|
are listed in the <literal>pg_timezone_abbrevs</literal> view (see <xref
|
|
linkend="view-pg-timezone-abbrevs"/>). You cannot set the
|
|
configuration parameters <xref linkend="guc-timezone"/> or
|
|
<xref linkend="guc-log-timezone"/> to a time
|
|
zone abbreviation, but you can use abbreviations in
|
|
date/time input values and with the <literal>AT TIME ZONE</literal>
|
|
operator.
|
|
</para>
|
|
</listitem>
|
|
<listitem>
|
|
<para>
|
|
In addition to the timezone names and abbreviations,
|
|
<productname>PostgreSQL</productname> will accept POSIX-style time zone
|
|
specifications, as described in
|
|
<xref linkend="datetime-posix-timezone-specs"/>. This option is not
|
|
normally preferable to using a named time zone, but it may be
|
|
necessary if no suitable IANA time zone entry is available.
|
|
</para>
|
|
</listitem>
|
|
</itemizedlist>
|
|
|
|
In short, this is the difference between abbreviations
|
|
and full names: abbreviations represent a specific offset from UTC,
|
|
whereas many of the full names imply a local daylight-savings time
|
|
rule, and so have two possible UTC offsets. As an example,
|
|
<literal>2014-06-04 12:00 America/New_York</literal> represents noon local
|
|
time in New York, which for this particular date was Eastern Daylight
|
|
Time (UTC-4). So <literal>2014-06-04 12:00 EDT</literal> specifies that
|
|
same time instant. But <literal>2014-06-04 12:00 EST</literal> specifies
|
|
noon Eastern Standard Time (UTC-5), regardless of whether daylight
|
|
savings was nominally in effect on that date.
|
|
</para>
|
|
|
|
<para>
|
|
To complicate matters, some jurisdictions have used the same timezone
|
|
abbreviation to mean different UTC offsets at different times; for
|
|
example, in Moscow <literal>MSK</literal> has meant UTC+3 in some years and
|
|
UTC+4 in others. <productname>PostgreSQL</productname> interprets such
|
|
abbreviations according to whatever they meant (or had most recently
|
|
meant) on the specified date; but, as with the <literal>EST</literal> example
|
|
above, this is not necessarily the same as local civil time on that date.
|
|
</para>
|
|
|
|
<para>
|
|
In all cases, timezone names and abbreviations are recognized
|
|
case-insensitively. (This is a change from <productname>PostgreSQL</productname>
|
|
versions prior to 8.2, which were case-sensitive in some contexts but
|
|
not others.)
|
|
</para>
|
|
|
|
<para>
|
|
Neither timezone names nor abbreviations are hard-wired into the server;
|
|
they are obtained from configuration files stored under
|
|
<filename>.../share/timezone/</filename> and <filename>.../share/timezonesets/</filename>
|
|
of the installation directory
|
|
(see <xref linkend="datetime-config-files"/>).
|
|
</para>
|
|
|
|
<para>
|
|
The <xref linkend="guc-timezone"/> configuration parameter can
|
|
be set in the file <filename>postgresql.conf</filename>, or in any of the
|
|
other standard ways described in <xref linkend="runtime-config"/>.
|
|
There are also some special ways to set it:
|
|
|
|
<itemizedlist>
|
|
<listitem>
|
|
<para>
|
|
The <acronym>SQL</acronym> command <command>SET TIME ZONE</command>
|
|
sets the time zone for the session. This is an alternative spelling
|
|
of <command>SET TIMEZONE TO</command> with a more SQL-spec-compatible syntax.
|
|
</para>
|
|
</listitem>
|
|
|
|
<listitem>
|
|
<para>
|
|
The <envar>PGTZ</envar> environment variable is used by
|
|
<application>libpq</application> clients
|
|
to send a <command>SET TIME ZONE</command>
|
|
command to the server upon connection.
|
|
</para>
|
|
</listitem>
|
|
</itemizedlist>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-interval-input">
|
|
<title>Interval Input</title>
|
|
|
|
<indexterm>
|
|
<primary>interval</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<type>interval</type> values can be written using the following
|
|
verbose syntax:
|
|
|
|
<synopsis>
|
|
<optional>@</optional> <replaceable>quantity</replaceable> <replaceable>unit</replaceable> <optional><replaceable>quantity</replaceable> <replaceable>unit</replaceable>...</optional> <optional><replaceable>direction</replaceable></optional>
|
|
</synopsis>
|
|
|
|
where <replaceable>quantity</replaceable> is a number (possibly signed);
|
|
<replaceable>unit</replaceable> is <literal>microsecond</literal>,
|
|
<literal>millisecond</literal>, <literal>second</literal>,
|
|
<literal>minute</literal>, <literal>hour</literal>, <literal>day</literal>,
|
|
<literal>week</literal>, <literal>month</literal>, <literal>year</literal>,
|
|
<literal>decade</literal>, <literal>century</literal>, <literal>millennium</literal>,
|
|
or abbreviations or plurals of these units;
|
|
<replaceable>direction</replaceable> can be <literal>ago</literal> or
|
|
empty. The at sign (<literal>@</literal>) is optional noise. The amounts
|
|
of the different units are implicitly added with appropriate
|
|
sign accounting. <literal>ago</literal> negates all the fields.
|
|
This syntax is also used for interval output, if
|
|
<xref linkend="guc-intervalstyle"/> is set to
|
|
<literal>postgres_verbose</literal>.
|
|
</para>
|
|
|
|
<para>
|
|
Quantities of days, hours, minutes, and seconds can be specified without
|
|
explicit unit markings. For example, <literal>'1 12:59:10'</literal> is read
|
|
the same as <literal>'1 day 12 hours 59 min 10 sec'</literal>. Also,
|
|
a combination of years and months can be specified with a dash;
|
|
for example <literal>'200-10'</literal> is read the same as <literal>'200 years
|
|
10 months'</literal>. (These shorter forms are in fact the only ones allowed
|
|
by the <acronym>SQL</acronym> standard, and are used for output when
|
|
<varname>IntervalStyle</varname> is set to <literal>sql_standard</literal>.)
|
|
</para>
|
|
|
|
<para>
|
|
Interval values can also be written as ISO 8601 time intervals, using
|
|
either the <quote>format with designators</quote> of the standard's section
|
|
4.4.3.2 or the <quote>alternative format</quote> of section 4.4.3.3. The
|
|
format with designators looks like this:
|
|
<synopsis>
|
|
P <replaceable>quantity</replaceable> <replaceable>unit</replaceable> <optional> <replaceable>quantity</replaceable> <replaceable>unit</replaceable> ...</optional> <optional> T <optional> <replaceable>quantity</replaceable> <replaceable>unit</replaceable> ...</optional></optional>
|
|
</synopsis>
|
|
The string must start with a <literal>P</literal>, and may include a
|
|
<literal>T</literal> that introduces the time-of-day units. The
|
|
available unit abbreviations are given in <xref
|
|
linkend="datatype-interval-iso8601-units"/>. Units may be
|
|
omitted, and may be specified in any order, but units smaller than
|
|
a day must appear after <literal>T</literal>. In particular, the meaning of
|
|
<literal>M</literal> depends on whether it is before or after
|
|
<literal>T</literal>.
|
|
</para>
|
|
|
|
<table id="datatype-interval-iso8601-units">
|
|
<title>ISO 8601 Interval Unit Abbreviations</title>
|
|
<tgroup cols="2">
|
|
<thead>
|
|
<row>
|
|
<entry>Abbreviation</entry>
|
|
<entry>Meaning</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry>Y</entry>
|
|
<entry>Years</entry>
|
|
</row>
|
|
<row>
|
|
<entry>M</entry>
|
|
<entry>Months (in the date part)</entry>
|
|
</row>
|
|
<row>
|
|
<entry>W</entry>
|
|
<entry>Weeks</entry>
|
|
</row>
|
|
<row>
|
|
<entry>D</entry>
|
|
<entry>Days</entry>
|
|
</row>
|
|
<row>
|
|
<entry>H</entry>
|
|
<entry>Hours</entry>
|
|
</row>
|
|
<row>
|
|
<entry>M</entry>
|
|
<entry>Minutes (in the time part)</entry>
|
|
</row>
|
|
<row>
|
|
<entry>S</entry>
|
|
<entry>Seconds</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
In the alternative format:
|
|
<synopsis>
|
|
P <optional> <replaceable>years</replaceable>-<replaceable>months</replaceable>-<replaceable>days</replaceable> </optional> <optional> T <replaceable>hours</replaceable>:<replaceable>minutes</replaceable>:<replaceable>seconds</replaceable> </optional>
|
|
</synopsis>
|
|
the string must begin with <literal>P</literal>, and a
|
|
<literal>T</literal> separates the date and time parts of the interval.
|
|
The values are given as numbers similar to ISO 8601 dates.
|
|
</para>
|
|
|
|
<para>
|
|
When writing an interval constant with a <replaceable>fields</replaceable>
|
|
specification, or when assigning a string to an interval column that was
|
|
defined with a <replaceable>fields</replaceable> specification, the interpretation of
|
|
unmarked quantities depends on the <replaceable>fields</replaceable>. For
|
|
example <literal>INTERVAL '1' YEAR</literal> is read as 1 year, whereas
|
|
<literal>INTERVAL '1'</literal> means 1 second. Also, field values
|
|
<quote>to the right</quote> of the least significant field allowed by the
|
|
<replaceable>fields</replaceable> specification are silently discarded. For
|
|
example, writing <literal>INTERVAL '1 day 2:03:04' HOUR TO MINUTE</literal>
|
|
results in dropping the seconds field, but not the day field.
|
|
</para>
|
|
|
|
<para>
|
|
According to the <acronym>SQL</acronym> standard all fields of an interval
|
|
value must have the same sign, so a leading negative sign applies to all
|
|
fields; for example the negative sign in the interval literal
|
|
<literal>'-1 2:03:04'</literal> applies to both the days and hour/minute/second
|
|
parts. <productname>PostgreSQL</productname> allows the fields to have different
|
|
signs, and traditionally treats each field in the textual representation
|
|
as independently signed, so that the hour/minute/second part is
|
|
considered positive in this example. If <varname>IntervalStyle</varname> is
|
|
set to <literal>sql_standard</literal> then a leading sign is considered
|
|
to apply to all fields (but only if no additional signs appear).
|
|
Otherwise the traditional <productname>PostgreSQL</productname> interpretation is
|
|
used. To avoid ambiguity, it's recommended to attach an explicit sign
|
|
to each field if any field is negative.
|
|
</para>
|
|
|
|
<para>
|
|
Internally, <type>interval</type> values are stored as three integral
|
|
fields: months, days, and microseconds. These fields are kept
|
|
separate because the number of days in a month varies, while a day
|
|
can have 23 or 25 hours if a daylight savings time transition is
|
|
involved. An interval input string that uses other units is
|
|
normalized into this format, and then reconstructed in a standardized
|
|
way for output, for example:
|
|
|
|
<programlisting>
|
|
SELECT '2 years 15 months 100 weeks 99 hours 123456789 milliseconds'::interval;
|
|
interval
|
|
---------------------------------------
|
|
3 years 3 mons 700 days 133:17:36.789
|
|
</programlisting>
|
|
|
|
Here weeks, which are understood as <quote>7 days</quote>, have been
|
|
kept separate, while the smaller and larger time units were
|
|
combined and normalized.
|
|
</para>
|
|
|
|
<para>
|
|
Input field values can have fractional parts, for example <literal>'1.5
|
|
weeks'</literal> or <literal>'01:02:03.45'</literal>. However,
|
|
because <type>interval</type> internally stores only integral fields,
|
|
fractional values must be converted into smaller
|
|
units. Fractional parts of units greater than months are rounded to
|
|
be an integer number of months, e.g. <literal>'1.5 years'</literal>
|
|
becomes <literal>'1 year 6 mons'</literal>. Fractional parts of
|
|
weeks and days are computed to be an integer number of days and
|
|
microseconds, assuming 30 days per month and 24 hours per day, e.g.,
|
|
<literal>'1.75 months'</literal> becomes <literal>1 mon 22 days
|
|
12:00:00</literal>. Only seconds will ever be shown as fractional
|
|
on output.
|
|
</para>
|
|
|
|
<para>
|
|
<xref linkend="datatype-interval-input-examples"/> shows some examples
|
|
of valid <type>interval</type> input.
|
|
</para>
|
|
|
|
<table id="datatype-interval-input-examples">
|
|
<title>Interval Input</title>
|
|
<tgroup cols="2">
|
|
<thead>
|
|
<row>
|
|
<entry>Example</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>1-2</literal></entry>
|
|
<entry>SQL standard format: 1 year 2 months</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>3 4:05:06</literal></entry>
|
|
<entry>SQL standard format: 3 days 4 hours 5 minutes 6 seconds</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>1 year 2 months 3 days 4 hours 5 minutes 6 seconds</literal></entry>
|
|
<entry>Traditional Postgres format: 1 year 2 months 3 days 4 hours 5 minutes 6 seconds</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>P1Y2M3DT4H5M6S</literal></entry>
|
|
<entry>ISO 8601 <quote>format with designators</quote>: same meaning as above</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>P0001-02-03T04:05:06</literal></entry>
|
|
<entry>ISO 8601 <quote>alternative format</quote>: same meaning as above</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-interval-output">
|
|
<title>Interval Output</title>
|
|
|
|
<indexterm>
|
|
<primary>interval</primary>
|
|
<secondary>output format</secondary>
|
|
<seealso>formatting</seealso>
|
|
</indexterm>
|
|
|
|
<para>
|
|
As previously explained, <productname>PostgreSQL</productname>
|
|
stores <type>interval</type> values as months, days, and
|
|
microseconds. For output, the months field is converted to years and
|
|
months by dividing by 12. The days field is shown as-is. The
|
|
microseconds field is converted to hours, minutes, seconds, and
|
|
fractional seconds. Thus months, minutes, and seconds will never be
|
|
shown as exceeding the ranges 0–11, 0–59, and 0–59
|
|
respectively, while the displayed years, days, and hours fields can
|
|
be quite large. (The <link
|
|
linkend="function-justify-days"><function>justify_days</function></link>
|
|
and <link
|
|
linkend="function-justify-hours"><function>justify_hours</function></link>
|
|
functions can be used if it is desirable to transpose large days or
|
|
hours values into the next higher field.)
|
|
</para>
|
|
|
|
<para>
|
|
The output format of the interval type can be set to one of the
|
|
four styles <literal>sql_standard</literal>, <literal>postgres</literal>,
|
|
<literal>postgres_verbose</literal>, or <literal>iso_8601</literal>,
|
|
using the command <literal>SET intervalstyle</literal>.
|
|
The default is the <literal>postgres</literal> format.
|
|
<xref linkend="interval-style-output-table"/> shows examples of each
|
|
output style.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>sql_standard</literal> style produces output that conforms to
|
|
the SQL standard's specification for interval literal strings, if
|
|
the interval value meets the standard's restrictions (either year-month
|
|
only or day-time only, with no mixing of positive
|
|
and negative components). Otherwise the output looks like a standard
|
|
year-month literal string followed by a day-time literal string,
|
|
with explicit signs added to disambiguate mixed-sign intervals.
|
|
</para>
|
|
|
|
<para>
|
|
The output of the <literal>postgres</literal> style matches the output of
|
|
<productname>PostgreSQL</productname> releases prior to 8.4 when the
|
|
<xref linkend="guc-datestyle"/> parameter was set to <literal>ISO</literal>.
|
|
</para>
|
|
|
|
<para>
|
|
The output of the <literal>postgres_verbose</literal> style matches the output of
|
|
<productname>PostgreSQL</productname> releases prior to 8.4 when the
|
|
<varname>DateStyle</varname> parameter was set to non-<literal>ISO</literal> output.
|
|
</para>
|
|
|
|
<para>
|
|
The output of the <literal>iso_8601</literal> style matches the <quote>format
|
|
with designators</quote> described in section 4.4.3.2 of the
|
|
ISO 8601 standard.
|
|
</para>
|
|
|
|
<table id="interval-style-output-table">
|
|
<title>Interval Output Style Examples</title>
|
|
<tgroup cols="4">
|
|
<thead>
|
|
<row>
|
|
<entry>Style Specification</entry>
|
|
<entry>Year-Month Interval</entry>
|
|
<entry>Day-Time Interval</entry>
|
|
<entry>Mixed Interval</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><literal>sql_standard</literal></entry>
|
|
<entry>1-2</entry>
|
|
<entry>3 4:05:06</entry>
|
|
<entry>-1-2 +3 -4:05:06</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>postgres</literal></entry>
|
|
<entry>1 year 2 mons</entry>
|
|
<entry>3 days 04:05:06</entry>
|
|
<entry>-1 year -2 mons +3 days -04:05:06</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>postgres_verbose</literal></entry>
|
|
<entry>@ 1 year 2 mons</entry>
|
|
<entry>@ 3 days 4 hours 5 mins 6 secs</entry>
|
|
<entry>@ 1 year 2 mons -3 days 4 hours 5 mins 6 secs ago</entry>
|
|
</row>
|
|
<row>
|
|
<entry><literal>iso_8601</literal></entry>
|
|
<entry>P1Y2M</entry>
|
|
<entry>P3DT4H5M6S</entry>
|
|
<entry>P-1Y-2M3D&zwsp;T-4H-5M-6S</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
</sect2>
|
|
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-boolean">
|
|
<title>Boolean Type</title>
|
|
|
|
<indexterm zone="datatype-boolean">
|
|
<primary>Boolean</primary>
|
|
<secondary>data type</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-boolean">
|
|
<primary>true</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-boolean">
|
|
<primary>false</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> provides the
|
|
standard <acronym>SQL</acronym> type <type>boolean</type>;
|
|
see <xref linkend="datatype-boolean-table"/>.
|
|
The <type>boolean</type> type can have several states:
|
|
<quote>true</quote>, <quote>false</quote>, and a third state,
|
|
<quote>unknown</quote>, which is represented by the
|
|
<acronym>SQL</acronym> null value.
|
|
</para>
|
|
|
|
<table id="datatype-boolean-table">
|
|
<title>Boolean Data Type</title>
|
|
<tgroup cols="3">
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>boolean</type></entry>
|
|
<entry>1 byte</entry>
|
|
<entry>state of true or false</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
Boolean constants can be represented in SQL queries by the SQL
|
|
key words <literal>TRUE</literal>, <literal>FALSE</literal>,
|
|
and <literal>NULL</literal>.
|
|
</para>
|
|
|
|
<para>
|
|
The datatype input function for type <type>boolean</type> accepts these
|
|
string representations for the <quote>true</quote> state:
|
|
<simplelist>
|
|
<member><literal>true</literal></member>
|
|
<member><literal>yes</literal></member>
|
|
<member><literal>on</literal></member>
|
|
<member><literal>1</literal></member>
|
|
</simplelist>
|
|
and these representations for the <quote>false</quote> state:
|
|
<simplelist>
|
|
<member><literal>false</literal></member>
|
|
<member><literal>no</literal></member>
|
|
<member><literal>off</literal></member>
|
|
<member><literal>0</literal></member>
|
|
</simplelist>
|
|
Unique prefixes of these strings are also accepted, for
|
|
example <literal>t</literal> or <literal>n</literal>.
|
|
Leading or trailing whitespace is ignored, and case does not matter.
|
|
</para>
|
|
|
|
<para>
|
|
The datatype output function for type <type>boolean</type> always emits
|
|
either <literal>t</literal> or <literal>f</literal>, as shown in
|
|
<xref linkend="datatype-boolean-example"/>.
|
|
</para>
|
|
|
|
<example id="datatype-boolean-example">
|
|
<title>Using the <type>boolean</type> Type</title>
|
|
|
|
<programlisting>
|
|
CREATE TABLE test1 (a boolean, b text);
|
|
INSERT INTO test1 VALUES (TRUE, 'sic est');
|
|
INSERT INTO test1 VALUES (FALSE, 'non est');
|
|
SELECT * FROM test1;
|
|
a | b
|
|
---+---------
|
|
t | sic est
|
|
f | non est
|
|
|
|
SELECT * FROM test1 WHERE a;
|
|
a | b
|
|
---+---------
|
|
t | sic est
|
|
</programlisting>
|
|
</example>
|
|
|
|
<para>
|
|
The key words <literal>TRUE</literal> and <literal>FALSE</literal> are
|
|
the preferred (<acronym>SQL</acronym>-compliant) method for writing
|
|
Boolean constants in SQL queries. But you can also use the string
|
|
representations by following the generic string-literal constant syntax
|
|
described in <xref linkend="sql-syntax-constants-generic"/>, for
|
|
example <literal>'yes'::boolean</literal>.
|
|
</para>
|
|
|
|
<para>
|
|
Note that the parser automatically understands
|
|
that <literal>TRUE</literal> and <literal>FALSE</literal> are of
|
|
type <type>boolean</type>, but this is not so
|
|
for <literal>NULL</literal> because that can have any type.
|
|
So in some contexts you might have to cast <literal>NULL</literal>
|
|
to <type>boolean</type> explicitly, for
|
|
example <literal>NULL::boolean</literal>. Conversely, the cast can be
|
|
omitted from a string-literal Boolean value in contexts where the parser
|
|
can deduce that the literal must be of type <type>boolean</type>.
|
|
</para>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-enum">
|
|
<title>Enumerated Types</title>
|
|
|
|
<indexterm zone="datatype-enum">
|
|
<primary>data type</primary>
|
|
<secondary>enumerated (enum)</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-enum">
|
|
<primary>enumerated types</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Enumerated (enum) types are data types that
|
|
comprise a static, ordered set of values.
|
|
They are equivalent to the <type>enum</type>
|
|
types supported in a number of programming languages. An example of an enum
|
|
type might be the days of the week, or a set of status values for
|
|
a piece of data.
|
|
</para>
|
|
|
|
<sect2 id="datatype-enum-declaration">
|
|
<title>Declaration of Enumerated Types</title>
|
|
|
|
<para>
|
|
Enum types are created using the <xref
|
|
linkend="sql-createtype"/> command,
|
|
for example:
|
|
|
|
<programlisting>
|
|
CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
|
|
</programlisting>
|
|
|
|
Once created, the enum type can be used in table and function
|
|
definitions much like any other type:
|
|
<programlisting>
|
|
CREATE TYPE mood AS ENUM ('sad', 'ok', 'happy');
|
|
CREATE TABLE person (
|
|
name text,
|
|
current_mood mood
|
|
);
|
|
INSERT INTO person VALUES ('Moe', 'happy');
|
|
SELECT * FROM person WHERE current_mood = 'happy';
|
|
name | current_mood
|
|
------+--------------
|
|
Moe | happy
|
|
(1 row)
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-enum-ordering">
|
|
<title>Ordering</title>
|
|
|
|
<para>
|
|
The ordering of the values in an enum type is the
|
|
order in which the values were listed when the type was created.
|
|
All standard comparison operators and related
|
|
aggregate functions are supported for enums. For example:
|
|
|
|
<programlisting>
|
|
INSERT INTO person VALUES ('Larry', 'sad');
|
|
INSERT INTO person VALUES ('Curly', 'ok');
|
|
SELECT * FROM person WHERE current_mood > 'sad';
|
|
name | current_mood
|
|
-------+--------------
|
|
Moe | happy
|
|
Curly | ok
|
|
(2 rows)
|
|
|
|
SELECT * FROM person WHERE current_mood > 'sad' ORDER BY current_mood;
|
|
name | current_mood
|
|
-------+--------------
|
|
Curly | ok
|
|
Moe | happy
|
|
(2 rows)
|
|
|
|
SELECT name
|
|
FROM person
|
|
WHERE current_mood = (SELECT MIN(current_mood) FROM person);
|
|
name
|
|
-------
|
|
Larry
|
|
(1 row)
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-enum-type-safety">
|
|
<title>Type Safety</title>
|
|
|
|
<para>
|
|
Each enumerated data type is separate and cannot
|
|
be compared with other enumerated types. See this example:
|
|
|
|
<programlisting>
|
|
CREATE TYPE happiness AS ENUM ('happy', 'very happy', 'ecstatic');
|
|
CREATE TABLE holidays (
|
|
num_weeks integer,
|
|
happiness happiness
|
|
);
|
|
INSERT INTO holidays(num_weeks,happiness) VALUES (4, 'happy');
|
|
INSERT INTO holidays(num_weeks,happiness) VALUES (6, 'very happy');
|
|
INSERT INTO holidays(num_weeks,happiness) VALUES (8, 'ecstatic');
|
|
INSERT INTO holidays(num_weeks,happiness) VALUES (2, 'sad');
|
|
ERROR: invalid input value for enum happiness: "sad"
|
|
SELECT person.name, holidays.num_weeks FROM person, holidays
|
|
WHERE person.current_mood = holidays.happiness;
|
|
ERROR: operator does not exist: mood = happiness
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
If you really need to do something like that, you can either
|
|
write a custom operator or add explicit casts to your query:
|
|
|
|
<programlisting>
|
|
SELECT person.name, holidays.num_weeks FROM person, holidays
|
|
WHERE person.current_mood::text = holidays.happiness::text;
|
|
name | num_weeks
|
|
------+-----------
|
|
Moe | 4
|
|
(1 row)
|
|
|
|
</programlisting>
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-enum-implementation-details">
|
|
<title>Implementation Details</title>
|
|
|
|
<para>
|
|
Enum labels are case sensitive, so
|
|
<type>'happy'</type> is not the same as <type>'HAPPY'</type>.
|
|
White space in the labels is significant too.
|
|
</para>
|
|
|
|
<para>
|
|
Although enum types are primarily intended for static sets of values,
|
|
there is support for adding new values to an existing enum type, and for
|
|
renaming values (see <xref linkend="sql-altertype"/>). Existing values
|
|
cannot be removed from an enum type, nor can the sort ordering of such
|
|
values be changed, short of dropping and re-creating the enum type.
|
|
</para>
|
|
|
|
<para>
|
|
An enum value occupies four bytes on disk. The length of an enum
|
|
value's textual label is limited by the <symbol>NAMEDATALEN</symbol>
|
|
setting compiled into <productname>PostgreSQL</productname>; in standard
|
|
builds this means at most 63 bytes.
|
|
</para>
|
|
|
|
<para>
|
|
The translations from internal enum values to textual labels are
|
|
kept in the system catalog
|
|
<link linkend="catalog-pg-enum"><structname>pg_enum</structname></link>.
|
|
Querying this catalog directly can be useful.
|
|
</para>
|
|
|
|
</sect2>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-geometric">
|
|
<title>Geometric Types</title>
|
|
|
|
<para>
|
|
Geometric data types represent two-dimensional spatial
|
|
objects. <xref linkend="datatype-geo-table"/> shows the geometric
|
|
types available in <productname>PostgreSQL</productname>.
|
|
</para>
|
|
|
|
<table id="datatype-geo-table">
|
|
<title>Geometric Types</title>
|
|
<tgroup cols="4">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<colspec colname="col4" colwidth="1*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
<entry>Representation</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry><type>point</type></entry>
|
|
<entry>16 bytes</entry>
|
|
<entry>Point on a plane</entry>
|
|
<entry>(x,y)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>line</type></entry>
|
|
<entry>32 bytes</entry>
|
|
<entry>Infinite line</entry>
|
|
<entry>{A,B,C}</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>lseg</type></entry>
|
|
<entry>32 bytes</entry>
|
|
<entry>Finite line segment</entry>
|
|
<entry>[(x1,y1),(x2,y2)]</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>box</type></entry>
|
|
<entry>32 bytes</entry>
|
|
<entry>Rectangular box</entry>
|
|
<entry>(x1,y1),(x2,y2)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>path</type></entry>
|
|
<entry>16+16n bytes</entry>
|
|
<entry>Closed path (similar to polygon)</entry>
|
|
<entry>((x1,y1),...)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>path</type></entry>
|
|
<entry>16+16n bytes</entry>
|
|
<entry>Open path</entry>
|
|
<entry>[(x1,y1),...]</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>polygon</type></entry>
|
|
<entry>40+16n bytes</entry>
|
|
<entry>Polygon (similar to closed path)</entry>
|
|
<entry>((x1,y1),...)</entry>
|
|
</row>
|
|
<row>
|
|
<entry><type>circle</type></entry>
|
|
<entry>24 bytes</entry>
|
|
<entry>Circle</entry>
|
|
<entry><(x,y),r> (center point and radius)</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
A rich set of functions and operators is available to perform various geometric
|
|
operations such as scaling, translation, rotation, and determining
|
|
intersections. They are explained in <xref linkend="functions-geometry"/>.
|
|
</para>
|
|
|
|
<sect2 id="datatype-geometric-points">
|
|
<title>Points</title>
|
|
|
|
<indexterm>
|
|
<primary>point</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Points are the fundamental two-dimensional building block for geometric
|
|
types. Values of type <type>point</type> are specified using either of
|
|
the following syntaxes:
|
|
|
|
<synopsis>
|
|
( <replaceable>x</replaceable> , <replaceable>y</replaceable> )
|
|
<replaceable>x</replaceable> , <replaceable>y</replaceable>
|
|
</synopsis>
|
|
|
|
where <replaceable>x</replaceable> and <replaceable>y</replaceable> are the respective
|
|
coordinates, as floating-point numbers.
|
|
</para>
|
|
|
|
<para>
|
|
Points are output using the first syntax.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-line">
|
|
<title>Lines</title>
|
|
|
|
<indexterm>
|
|
<primary>line</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Lines are represented by the linear
|
|
equation <replaceable>A</replaceable>x + <replaceable>B</replaceable>y + <replaceable>C</replaceable> = 0,
|
|
where <replaceable>A</replaceable> and <replaceable>B</replaceable> are not both zero. Values
|
|
of type <type>line</type> are input and output in the following form:
|
|
<synopsis>
|
|
{ <replaceable>A</replaceable>, <replaceable>B</replaceable>, <replaceable>C</replaceable> }
|
|
</synopsis>
|
|
|
|
Alternatively, any of the following forms can be used for input:
|
|
|
|
<synopsis>
|
|
[ ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) ]
|
|
( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> )
|
|
<replaceable>x1</replaceable> , <replaceable>y1</replaceable> , <replaceable>x2</replaceable> , <replaceable>y2</replaceable>
|
|
</synopsis>
|
|
|
|
where
|
|
<literal>(<replaceable>x1</replaceable>,<replaceable>y1</replaceable>)</literal>
|
|
and
|
|
<literal>(<replaceable>x2</replaceable>,<replaceable>y2</replaceable>)</literal>
|
|
are two different points on the line.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-lseg">
|
|
<title>Line Segments</title>
|
|
|
|
<indexterm>
|
|
<primary>lseg</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>line segment</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Line segments are represented by pairs of points that are the endpoints
|
|
of the segment. Values of type <type>lseg</type> are specified using any
|
|
of the following syntaxes:
|
|
|
|
<synopsis>
|
|
[ ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) ]
|
|
( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> )
|
|
<replaceable>x1</replaceable> , <replaceable>y1</replaceable> , <replaceable>x2</replaceable> , <replaceable>y2</replaceable>
|
|
</synopsis>
|
|
|
|
where
|
|
<literal>(<replaceable>x1</replaceable>,<replaceable>y1</replaceable>)</literal>
|
|
and
|
|
<literal>(<replaceable>x2</replaceable>,<replaceable>y2</replaceable>)</literal>
|
|
are the end points of the line segment.
|
|
</para>
|
|
|
|
<para>
|
|
Line segments are output using the first syntax.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-geometric-boxes">
|
|
<title>Boxes</title>
|
|
|
|
<indexterm>
|
|
<primary>box (data type)</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>rectangle</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Boxes are represented by pairs of points that are opposite
|
|
corners of the box.
|
|
Values of type <type>box</type> are specified using any of the following
|
|
syntaxes:
|
|
|
|
<synopsis>
|
|
( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> ) )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ( <replaceable>x2</replaceable> , <replaceable>y2</replaceable> )
|
|
<replaceable>x1</replaceable> , <replaceable>y1</replaceable> , <replaceable>x2</replaceable> , <replaceable>y2</replaceable>
|
|
</synopsis>
|
|
|
|
where
|
|
<literal>(<replaceable>x1</replaceable>,<replaceable>y1</replaceable>)</literal>
|
|
and
|
|
<literal>(<replaceable>x2</replaceable>,<replaceable>y2</replaceable>)</literal>
|
|
are any two opposite corners of the box.
|
|
</para>
|
|
|
|
<para>
|
|
Boxes are output using the second syntax.
|
|
</para>
|
|
|
|
<para>
|
|
Any two opposite corners can be supplied on input, but the values
|
|
will be reordered as needed to store the
|
|
upper right and lower left corners, in that order.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-geometric-paths">
|
|
<title>Paths</title>
|
|
|
|
<indexterm>
|
|
<primary>path (data type)</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Paths are represented by lists of connected points. Paths can be
|
|
<firstterm>open</firstterm>, where
|
|
the first and last points in the list are considered not connected, or
|
|
<firstterm>closed</firstterm>,
|
|
where the first and last points are considered connected.
|
|
</para>
|
|
|
|
<para>
|
|
Values of type <type>path</type> are specified using any of the following
|
|
syntaxes:
|
|
|
|
<synopsis>
|
|
[ ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> ) ]
|
|
( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> ) )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> , ... , <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
|
|
<replaceable>x1</replaceable> , <replaceable>y1</replaceable> , ... , <replaceable>xn</replaceable> , <replaceable>yn</replaceable>
|
|
</synopsis>
|
|
|
|
where the points are the end points of the line segments
|
|
comprising the path. Square brackets (<literal>[]</literal>) indicate
|
|
an open path, while parentheses (<literal>()</literal>) indicate a
|
|
closed path. When the outermost parentheses are omitted, as
|
|
in the third through fifth syntaxes, a closed path is assumed.
|
|
</para>
|
|
|
|
<para>
|
|
Paths are output using the first or second syntax, as appropriate.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-polygon">
|
|
<title>Polygons</title>
|
|
|
|
<indexterm>
|
|
<primary>polygon</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Polygons are represented by lists of points (the vertices of the
|
|
polygon). Polygons are very similar to closed paths; the essential
|
|
difference is that a polygon is considered to include the area
|
|
within it, while a path is not.
|
|
</para>
|
|
|
|
<para>
|
|
Values of type <type>polygon</type> are specified using any of the
|
|
following syntaxes:
|
|
|
|
<synopsis>
|
|
( ( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> ) )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> ) , ... , ( <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
|
|
( <replaceable>x1</replaceable> , <replaceable>y1</replaceable> , ... , <replaceable>xn</replaceable> , <replaceable>yn</replaceable> )
|
|
<replaceable>x1</replaceable> , <replaceable>y1</replaceable> , ... , <replaceable>xn</replaceable> , <replaceable>yn</replaceable>
|
|
</synopsis>
|
|
|
|
where the points are the end points of the line segments
|
|
comprising the boundary of the polygon.
|
|
</para>
|
|
|
|
<para>
|
|
Polygons are output using the first syntax.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-circle">
|
|
<title>Circles</title>
|
|
|
|
<indexterm>
|
|
<primary>circle</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Circles are represented by a center point and radius.
|
|
Values of type <type>circle</type> are specified using any of the
|
|
following syntaxes:
|
|
|
|
<synopsis>
|
|
< ( <replaceable>x</replaceable> , <replaceable>y</replaceable> ) , <replaceable>r</replaceable> >
|
|
( ( <replaceable>x</replaceable> , <replaceable>y</replaceable> ) , <replaceable>r</replaceable> )
|
|
( <replaceable>x</replaceable> , <replaceable>y</replaceable> ) , <replaceable>r</replaceable>
|
|
<replaceable>x</replaceable> , <replaceable>y</replaceable> , <replaceable>r</replaceable>
|
|
</synopsis>
|
|
|
|
where
|
|
<literal>(<replaceable>x</replaceable>,<replaceable>y</replaceable>)</literal>
|
|
is the center point and <replaceable>r</replaceable> is the radius of the
|
|
circle.
|
|
</para>
|
|
|
|
<para>
|
|
Circles are output using the first syntax.
|
|
</para>
|
|
</sect2>
|
|
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-net-types">
|
|
<title>Network Address Types</title>
|
|
|
|
<indexterm zone="datatype-net-types">
|
|
<primary>network</primary>
|
|
<secondary>data types</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> offers data types to store IPv4, IPv6, and MAC
|
|
addresses, as shown in <xref linkend="datatype-net-types-table"/>. It
|
|
is better to use these types instead of plain text types to store
|
|
network addresses, because
|
|
these types offer input error checking and specialized
|
|
operators and functions (see <xref linkend="functions-net"/>).
|
|
</para>
|
|
|
|
<table tocentry="1" id="datatype-net-types-table">
|
|
<title>Network Address Types</title>
|
|
<tgroup cols="3">
|
|
<colspec colname="col1" colwidth="1*"/>
|
|
<colspec colname="col2" colwidth="1*"/>
|
|
<colspec colname="col3" colwidth="2*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Storage Size</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
|
|
<row>
|
|
<entry><type>cidr</type></entry>
|
|
<entry>7 or 19 bytes</entry>
|
|
<entry>IPv4 and IPv6 networks</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>inet</type></entry>
|
|
<entry>7 or 19 bytes</entry>
|
|
<entry>IPv4 and IPv6 hosts and networks</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>macaddr</type></entry>
|
|
<entry>6 bytes</entry>
|
|
<entry>MAC addresses</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>macaddr8</type></entry>
|
|
<entry>8 bytes</entry>
|
|
<entry>MAC addresses (EUI-64 format)</entry>
|
|
</row>
|
|
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
When sorting <type>inet</type> or <type>cidr</type> data types,
|
|
IPv4 addresses will always sort before IPv6 addresses, including
|
|
IPv4 addresses encapsulated or mapped to IPv6 addresses, such as
|
|
::10.2.3.4 or ::ffff:10.4.3.2.
|
|
</para>
|
|
|
|
|
|
<sect2 id="datatype-inet">
|
|
<title><type>inet</type></title>
|
|
|
|
<indexterm>
|
|
<primary>inet (data type)</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>inet</type> type holds an IPv4 or IPv6 host address, and
|
|
optionally its subnet, all in one field.
|
|
The subnet is represented by the number of network address bits
|
|
present in the host address (the
|
|
<quote>netmask</quote>). If the netmask is 32 and the address is IPv4,
|
|
then the value does not indicate a subnet, only a single host.
|
|
In IPv6, the address length is 128 bits, so 128 bits specify a
|
|
unique host address. Note that if you
|
|
want to accept only networks, you should use the
|
|
<type>cidr</type> type rather than <type>inet</type>.
|
|
</para>
|
|
|
|
<para>
|
|
The input format for this type is
|
|
<replaceable class="parameter">address/y</replaceable>
|
|
where
|
|
<replaceable class="parameter">address</replaceable>
|
|
is an IPv4 or IPv6 address and
|
|
<replaceable class="parameter">y</replaceable>
|
|
is the number of bits in the netmask. If the
|
|
<replaceable class="parameter">/y</replaceable>
|
|
portion is omitted, the
|
|
netmask is taken to be 32 for IPv4 or 128 for IPv6,
|
|
so the value represents
|
|
just a single host. On display, the
|
|
<replaceable class="parameter">/y</replaceable>
|
|
portion is suppressed if the netmask specifies a single host.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-cidr">
|
|
<title><type>cidr</type></title>
|
|
|
|
<indexterm>
|
|
<primary>cidr</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>cidr</type> type holds an IPv4 or IPv6 network specification.
|
|
Input and output formats follow Classless Internet Domain Routing
|
|
conventions.
|
|
The format for specifying networks is <replaceable
|
|
class="parameter">address/y</replaceable> where <replaceable
|
|
class="parameter">address</replaceable> is the network's lowest
|
|
address represented as an
|
|
IPv4 or IPv6 address, and <replaceable
|
|
class="parameter">y</replaceable> is the number of bits in the netmask. If
|
|
<replaceable class="parameter">y</replaceable> is omitted, it is calculated
|
|
using assumptions from the older classful network numbering system, except
|
|
it will be at least large enough to include all of the octets
|
|
written in the input. It is an error to specify a network address
|
|
that has bits set to the right of the specified netmask.
|
|
</para>
|
|
|
|
<para>
|
|
<xref linkend="datatype-net-cidr-table"/> shows some examples.
|
|
</para>
|
|
|
|
<table id="datatype-net-cidr-table">
|
|
<title><type>cidr</type> Type Input Examples</title>
|
|
<tgroup cols="3">
|
|
<thead>
|
|
<row>
|
|
<entry><type>cidr</type> Input</entry>
|
|
<entry><type>cidr</type> Output</entry>
|
|
<entry><literal><function>abbrev(<type>cidr</type>)</function></literal></entry>
|
|
</row>
|
|
</thead>
|
|
<tbody>
|
|
<row>
|
|
<entry>192.168.100.128/25</entry>
|
|
<entry>192.168.100.128/25</entry>
|
|
<entry>192.168.100.128/25</entry>
|
|
</row>
|
|
<row>
|
|
<entry>192.168/24</entry>
|
|
<entry>192.168.0.0/24</entry>
|
|
<entry>192.168.0/24</entry>
|
|
</row>
|
|
<row>
|
|
<entry>192.168/25</entry>
|
|
<entry>192.168.0.0/25</entry>
|
|
<entry>192.168.0.0/25</entry>
|
|
</row>
|
|
<row>
|
|
<entry>192.168.1</entry>
|
|
<entry>192.168.1.0/24</entry>
|
|
<entry>192.168.1/24</entry>
|
|
</row>
|
|
<row>
|
|
<entry>192.168</entry>
|
|
<entry>192.168.0.0/24</entry>
|
|
<entry>192.168.0/24</entry>
|
|
</row>
|
|
<row>
|
|
<entry>128.1</entry>
|
|
<entry>128.1.0.0/16</entry>
|
|
<entry>128.1/16</entry>
|
|
</row>
|
|
<row>
|
|
<entry>128</entry>
|
|
<entry>128.0.0.0/16</entry>
|
|
<entry>128.0/16</entry>
|
|
</row>
|
|
<row>
|
|
<entry>128.1.2</entry>
|
|
<entry>128.1.2.0/24</entry>
|
|
<entry>128.1.2/24</entry>
|
|
</row>
|
|
<row>
|
|
<entry>10.1.2</entry>
|
|
<entry>10.1.2.0/24</entry>
|
|
<entry>10.1.2/24</entry>
|
|
</row>
|
|
<row>
|
|
<entry>10.1</entry>
|
|
<entry>10.1.0.0/16</entry>
|
|
<entry>10.1/16</entry>
|
|
</row>
|
|
<row>
|
|
<entry>10</entry>
|
|
<entry>10.0.0.0/8</entry>
|
|
<entry>10/8</entry>
|
|
</row>
|
|
<row>
|
|
<entry>10.1.2.3/32</entry>
|
|
<entry>10.1.2.3/32</entry>
|
|
<entry>10.1.2.3/32</entry>
|
|
</row>
|
|
<row>
|
|
<entry>2001:4f8:3:ba::/64</entry>
|
|
<entry>2001:4f8:3:ba::/64</entry>
|
|
<entry>2001:4f8:3:ba/64</entry>
|
|
</row>
|
|
<row>
|
|
<entry>2001:4f8:3:ba:&zwsp;2e0:81ff:fe22:d1f1/128</entry>
|
|
<entry>2001:4f8:3:ba:&zwsp;2e0:81ff:fe22:d1f1/128</entry>
|
|
<entry>2001:4f8:3:ba:&zwsp;2e0:81ff:fe22:d1f1/128</entry>
|
|
</row>
|
|
<row>
|
|
<entry>::ffff:1.2.3.0/120</entry>
|
|
<entry>::ffff:1.2.3.0/120</entry>
|
|
<entry>::ffff:1.2.3/120</entry>
|
|
</row>
|
|
<row>
|
|
<entry>::ffff:1.2.3.0/128</entry>
|
|
<entry>::ffff:1.2.3.0/128</entry>
|
|
<entry>::ffff:1.2.3.0/128</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-inet-vs-cidr">
|
|
<title><type>inet</type> vs. <type>cidr</type></title>
|
|
|
|
<para>
|
|
The essential difference between <type>inet</type> and <type>cidr</type>
|
|
data types is that <type>inet</type> accepts values with nonzero bits to
|
|
the right of the netmask, whereas <type>cidr</type> does not. For
|
|
example, <literal>192.168.0.1/24</literal> is valid for <type>inet</type>
|
|
but not for <type>cidr</type>.
|
|
</para>
|
|
|
|
<tip>
|
|
<para>
|
|
If you do not like the output format for <type>inet</type> or
|
|
<type>cidr</type> values, try the functions <function>host</function>,
|
|
<function>text</function>, and <function>abbrev</function>.
|
|
</para>
|
|
</tip>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-macaddr">
|
|
<title><type>macaddr</type></title>
|
|
|
|
<indexterm>
|
|
<primary>macaddr (data type)</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>MAC address</primary>
|
|
<see>macaddr</see>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>macaddr</type> type stores MAC addresses, known for example
|
|
from Ethernet card hardware addresses (although MAC addresses are
|
|
used for other purposes as well). Input is accepted in the
|
|
following formats:
|
|
|
|
<simplelist>
|
|
<member><literal>'08:00:2b:01:02:03'</literal></member>
|
|
<member><literal>'08-00-2b-01-02-03'</literal></member>
|
|
<member><literal>'08002b:010203'</literal></member>
|
|
<member><literal>'08002b-010203'</literal></member>
|
|
<member><literal>'0800.2b01.0203'</literal></member>
|
|
<member><literal>'0800-2b01-0203'</literal></member>
|
|
<member><literal>'08002b010203'</literal></member>
|
|
</simplelist>
|
|
|
|
These examples all specify the same address. Upper and
|
|
lower case is accepted for the digits
|
|
<literal>a</literal> through <literal>f</literal>. Output is always in the
|
|
first of the forms shown.
|
|
</para>
|
|
|
|
<para>
|
|
IEEE Standard 802-2001 specifies the second form shown (with hyphens)
|
|
as the canonical form for MAC addresses, and specifies the first
|
|
form (with colons) as used with bit-reversed, MSB-first notation, so that
|
|
08-00-2b-01-02-03 = 10:00:D4:80:40:C0. This convention is widely
|
|
ignored nowadays, and it is relevant only for obsolete network
|
|
protocols (such as Token Ring). PostgreSQL makes no provisions
|
|
for bit reversal; all accepted formats use the canonical LSB
|
|
order.
|
|
</para>
|
|
|
|
<para>
|
|
The remaining five input formats are not part of any standard.
|
|
</para>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-macaddr8">
|
|
<title><type>macaddr8</type></title>
|
|
|
|
<indexterm>
|
|
<primary>macaddr8 (data type)</primary>
|
|
</indexterm>
|
|
|
|
<indexterm>
|
|
<primary>MAC address (EUI-64 format)</primary>
|
|
<see>macaddr</see>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>macaddr8</type> type stores MAC addresses in EUI-64
|
|
format, known for example from Ethernet card hardware addresses
|
|
(although MAC addresses are used for other purposes as well).
|
|
This type can accept both 6 and 8 byte length MAC addresses
|
|
and stores them in 8 byte length format. MAC addresses given
|
|
in 6 byte format will be stored in 8 byte length format with the
|
|
4th and 5th bytes set to FF and FE, respectively.
|
|
|
|
Note that IPv6 uses a modified EUI-64 format where the 7th bit
|
|
should be set to one after the conversion from EUI-48. The
|
|
function <function>macaddr8_set7bit</function> is provided to make this
|
|
change.
|
|
|
|
Generally speaking, any input which is comprised of pairs of hex
|
|
digits (on byte boundaries), optionally separated consistently by
|
|
one of <literal>':'</literal>, <literal>'-'</literal> or <literal>'.'</literal>, is
|
|
accepted. The number of hex digits must be either 16 (8 bytes) or
|
|
12 (6 bytes). Leading and trailing whitespace is ignored.
|
|
|
|
The following are examples of input formats that are accepted:
|
|
|
|
<simplelist>
|
|
<member><literal>'08:00:2b:01:02:03:04:05'</literal></member>
|
|
<member><literal>'08-00-2b-01-02-03-04-05'</literal></member>
|
|
<member><literal>'08002b:0102030405'</literal></member>
|
|
<member><literal>'08002b-0102030405'</literal></member>
|
|
<member><literal>'0800.2b01.0203.0405'</literal></member>
|
|
<member><literal>'0800-2b01-0203-0405'</literal></member>
|
|
<member><literal>'08002b01:02030405'</literal></member>
|
|
<member><literal>'08002b0102030405'</literal></member>
|
|
</simplelist>
|
|
|
|
These examples all specify the same address. Upper and
|
|
lower case is accepted for the digits
|
|
<literal>a</literal> through <literal>f</literal>. Output is always in the
|
|
first of the forms shown.
|
|
</para>
|
|
|
|
<para>
|
|
The last six input formats shown above are not part of any standard.
|
|
</para>
|
|
|
|
<para>
|
|
To convert a traditional 48 bit MAC address in EUI-48 format to
|
|
modified EUI-64 format to be included as the host portion of an
|
|
IPv6 address, use <function>macaddr8_set7bit</function> as shown:
|
|
|
|
<programlisting>
|
|
SELECT macaddr8_set7bit('08:00:2b:01:02:03');
|
|
<computeroutput>
|
|
macaddr8_set7bit
|
|
-------------------------
|
|
0a:00:2b:ff:fe:01:02:03
|
|
(1 row)
|
|
</computeroutput>
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
</sect2>
|
|
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-bit">
|
|
<title>Bit String Types</title>
|
|
|
|
<indexterm zone="datatype-bit">
|
|
<primary>bit string</primary>
|
|
<secondary>data type</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Bit strings are strings of 1's and 0's. They can be used to store
|
|
or visualize bit masks. There are two SQL bit types:
|
|
<type>bit(<replaceable>n</replaceable>)</type> and <type>bit
|
|
varying(<replaceable>n</replaceable>)</type>, where
|
|
<replaceable>n</replaceable> is a positive integer.
|
|
</para>
|
|
|
|
<para>
|
|
<type>bit</type> type data must match the length
|
|
<replaceable>n</replaceable> exactly; it is an error to attempt to
|
|
store shorter or longer bit strings. <type>bit varying</type> data is
|
|
of variable length up to the maximum length
|
|
<replaceable>n</replaceable>; longer strings will be rejected.
|
|
Writing <type>bit</type> without a length is equivalent to
|
|
<literal>bit(1)</literal>, while <type>bit varying</type> without a length
|
|
specification means unlimited length.
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
If one explicitly casts a bit-string value to
|
|
<type>bit(<replaceable>n</replaceable>)</type>, it will be truncated or
|
|
zero-padded on the right to be exactly <replaceable>n</replaceable> bits,
|
|
without raising an error. Similarly,
|
|
if one explicitly casts a bit-string value to
|
|
<type>bit varying(<replaceable>n</replaceable>)</type>, it will be truncated
|
|
on the right if it is more than <replaceable>n</replaceable> bits.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
Refer to <xref
|
|
linkend="sql-syntax-bit-strings"/> for information about the syntax
|
|
of bit string constants. Bit-logical operators and string
|
|
manipulation functions are available; see <xref
|
|
linkend="functions-bitstring"/>.
|
|
</para>
|
|
|
|
<example>
|
|
<title>Using the Bit String Types</title>
|
|
|
|
<programlisting>
|
|
CREATE TABLE test (a BIT(3), b BIT VARYING(5));
|
|
INSERT INTO test VALUES (B'101', B'00');
|
|
INSERT INTO test VALUES (B'10', B'101');
|
|
<computeroutput>
|
|
ERROR: bit string length 2 does not match type bit(3)
|
|
</computeroutput>
|
|
INSERT INTO test VALUES (B'10'::bit(3), B'101');
|
|
SELECT * FROM test;
|
|
<computeroutput>
|
|
a | b
|
|
-----+-----
|
|
101 | 00
|
|
100 | 101
|
|
</computeroutput>
|
|
</programlisting>
|
|
</example>
|
|
|
|
<para>
|
|
A bit string value requires 1 byte for each group of 8 bits, plus
|
|
5 or 8 bytes overhead depending on the length of the string
|
|
(but long values may be compressed or moved out-of-line, as explained
|
|
in <xref linkend="datatype-character"/> for character strings).
|
|
</para>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-textsearch">
|
|
<title>Text Search Types</title>
|
|
|
|
<indexterm zone="datatype-textsearch">
|
|
<primary>full text search</primary>
|
|
<secondary>data types</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-textsearch">
|
|
<primary>text search</primary>
|
|
<secondary>data types</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
<productname>PostgreSQL</productname> provides two data types that
|
|
are designed to support full text search, which is the activity of
|
|
searching through a collection of natural-language <firstterm>documents</firstterm>
|
|
to locate those that best match a <firstterm>query</firstterm>.
|
|
The <type>tsvector</type> type represents a document in a form optimized
|
|
for text search; the <type>tsquery</type> type similarly represents
|
|
a text query.
|
|
<xref linkend="textsearch"/> provides a detailed explanation of this
|
|
facility, and <xref linkend="functions-textsearch"/> summarizes the
|
|
related functions and operators.
|
|
</para>
|
|
|
|
<sect2 id="datatype-tsvector">
|
|
<title><type>tsvector</type></title>
|
|
|
|
<indexterm>
|
|
<primary>tsvector (data type)</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
A <type>tsvector</type> value is a sorted list of distinct
|
|
<firstterm>lexemes</firstterm>, which are words that have been
|
|
<firstterm>normalized</firstterm> to merge different variants of the same word
|
|
(see <xref linkend="textsearch"/> for details). Sorting and
|
|
duplicate-elimination are done automatically during input, as shown in
|
|
this example:
|
|
|
|
<programlisting>
|
|
SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
|
|
tsvector
|
|
----------------------------------------------------
|
|
'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat'
|
|
</programlisting>
|
|
|
|
To represent
|
|
lexemes containing whitespace or punctuation, surround them with quotes:
|
|
|
|
<programlisting>
|
|
SELECT $$the lexeme ' ' contains spaces$$::tsvector;
|
|
tsvector
|
|
-------------------------------------------
|
|
' ' 'contains' 'lexeme' 'spaces' 'the'
|
|
</programlisting>
|
|
|
|
(We use dollar-quoted string literals in this example and the next one
|
|
to avoid the confusion of having to double quote marks within the
|
|
literals.) Embedded quotes and backslashes must be doubled:
|
|
|
|
<programlisting>
|
|
SELECT $$the lexeme 'Joe''s' contains a quote$$::tsvector;
|
|
tsvector
|
|
------------------------------------------------
|
|
'Joe''s' 'a' 'contains' 'lexeme' 'quote' 'the'
|
|
</programlisting>
|
|
|
|
Optionally, integer <firstterm>positions</firstterm>
|
|
can be attached to lexemes:
|
|
|
|
<programlisting>
|
|
SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector;
|
|
tsvector
|
|
-------------------------------------------------------------------&zwsp;------------
|
|
'a':1,6,10 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'on':5 'rat':12 'sat':4
|
|
</programlisting>
|
|
|
|
A position normally indicates the source word's location in the
|
|
document. Positional information can be used for
|
|
<firstterm>proximity ranking</firstterm>. Position values can
|
|
range from 1 to 16383; larger numbers are silently set to 16383.
|
|
Duplicate positions for the same lexeme are discarded.
|
|
</para>
|
|
|
|
<para>
|
|
Lexemes that have positions can further be labeled with a
|
|
<firstterm>weight</firstterm>, which can be <literal>A</literal>,
|
|
<literal>B</literal>, <literal>C</literal>, or <literal>D</literal>.
|
|
<literal>D</literal> is the default and hence is not shown on output:
|
|
|
|
<programlisting>
|
|
SELECT 'a:1A fat:2B,4C cat:5D'::tsvector;
|
|
tsvector
|
|
----------------------------
|
|
'a':1A 'cat':5 'fat':2B,4C
|
|
</programlisting>
|
|
|
|
Weights are typically used to reflect document structure, for example
|
|
by marking title words differently from body words. Text search
|
|
ranking functions can assign different priorities to the different
|
|
weight markers.
|
|
</para>
|
|
|
|
<para>
|
|
It is important to understand that the
|
|
<type>tsvector</type> type itself does not perform any word
|
|
normalization; it assumes the words it is given are normalized
|
|
appropriately for the application. For example,
|
|
|
|
<programlisting>
|
|
SELECT 'The Fat Rats'::tsvector;
|
|
tsvector
|
|
--------------------
|
|
'Fat' 'Rats' 'The'
|
|
</programlisting>
|
|
|
|
For most English-text-searching applications the above words would
|
|
be considered non-normalized, but <type>tsvector</type> doesn't care.
|
|
Raw document text should usually be passed through
|
|
<function>to_tsvector</function> to normalize the words appropriately
|
|
for searching:
|
|
|
|
<programlisting>
|
|
SELECT to_tsvector('english', 'The Fat Rats');
|
|
to_tsvector
|
|
-----------------
|
|
'fat':2 'rat':3
|
|
</programlisting>
|
|
|
|
Again, see <xref linkend="textsearch"/> for more detail.
|
|
</para>
|
|
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-tsquery">
|
|
<title><type>tsquery</type></title>
|
|
|
|
<indexterm>
|
|
<primary>tsquery (data type)</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
A <type>tsquery</type> value stores lexemes that are to be
|
|
searched for, and can combine them using the Boolean operators
|
|
<literal>&</literal> (AND), <literal>|</literal> (OR), and
|
|
<literal>!</literal> (NOT), as well as the phrase search operator
|
|
<literal><-></literal> (FOLLOWED BY). There is also a variant
|
|
<literal><<replaceable>N</replaceable>></literal> of the FOLLOWED BY
|
|
operator, where <replaceable>N</replaceable> is an integer constant that
|
|
specifies the distance between the two lexemes being searched
|
|
for. <literal><-></literal> is equivalent to <literal><1></literal>.
|
|
</para>
|
|
|
|
<para>
|
|
Parentheses can be used to enforce grouping of these operators.
|
|
In the absence of parentheses, <literal>!</literal> (NOT) binds most tightly,
|
|
<literal><-></literal> (FOLLOWED BY) next most tightly, then
|
|
<literal>&</literal> (AND), with <literal>|</literal> (OR) binding
|
|
the least tightly.
|
|
</para>
|
|
|
|
<para>
|
|
Here are some examples:
|
|
|
|
<programlisting>
|
|
SELECT 'fat & rat'::tsquery;
|
|
tsquery
|
|
---------------
|
|
'fat' & 'rat'
|
|
|
|
SELECT 'fat & (rat | cat)'::tsquery;
|
|
tsquery
|
|
---------------------------
|
|
'fat' & ( 'rat' | 'cat' )
|
|
|
|
SELECT 'fat & rat & ! cat'::tsquery;
|
|
tsquery
|
|
------------------------
|
|
'fat' & 'rat' & !'cat'
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
Optionally, lexemes in a <type>tsquery</type> can be labeled with
|
|
one or more weight letters, which restricts them to match only
|
|
<type>tsvector</type> lexemes with one of those weights:
|
|
|
|
<programlisting>
|
|
SELECT 'fat:ab & cat'::tsquery;
|
|
tsquery
|
|
------------------
|
|
'fat':AB & 'cat'
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
Also, lexemes in a <type>tsquery</type> can be labeled with <literal>*</literal>
|
|
to specify prefix matching:
|
|
<programlisting>
|
|
SELECT 'super:*'::tsquery;
|
|
tsquery
|
|
-----------
|
|
'super':*
|
|
</programlisting>
|
|
This query will match any word in a <type>tsvector</type> that begins
|
|
with <quote>super</quote>.
|
|
</para>
|
|
|
|
<para>
|
|
Quoting rules for lexemes are the same as described previously for
|
|
lexemes in <type>tsvector</type>; and, as with <type>tsvector</type>,
|
|
any required normalization of words must be done before converting
|
|
to the <type>tsquery</type> type. The <function>to_tsquery</function>
|
|
function is convenient for performing such normalization:
|
|
|
|
<programlisting>
|
|
SELECT to_tsquery('Fat:ab & Cats');
|
|
to_tsquery
|
|
------------------
|
|
'fat':AB & 'cat'
|
|
</programlisting>
|
|
|
|
Note that <function>to_tsquery</function> will process prefixes in the same way
|
|
as other words, which means this comparison returns true:
|
|
|
|
<programlisting>
|
|
SELECT to_tsvector( 'postgraduate' ) @@ to_tsquery( 'postgres:*' );
|
|
?column?
|
|
----------
|
|
t
|
|
</programlisting>
|
|
because <literal>postgres</literal> gets stemmed to <literal>postgr</literal>:
|
|
<programlisting>
|
|
SELECT to_tsvector( 'postgraduate' ), to_tsquery( 'postgres:*' );
|
|
to_tsvector | to_tsquery
|
|
---------------+------------
|
|
'postgradu':1 | 'postgr':*
|
|
</programlisting>
|
|
which will match the stemmed form of <literal>postgraduate</literal>.
|
|
</para>
|
|
|
|
</sect2>
|
|
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-uuid">
|
|
<title><acronym>UUID</acronym> Type</title>
|
|
|
|
<indexterm zone="datatype-uuid">
|
|
<primary>UUID</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The data type <type>uuid</type> stores Universally Unique Identifiers
|
|
(UUID) as defined by <ulink url="https://datatracker.ietf.org/doc/html/rfc4122">RFC 4122</ulink>,
|
|
ISO/IEC 9834-8:2005, and related standards.
|
|
(Some systems refer to this data type as a globally unique identifier, or
|
|
GUID,<indexterm><primary>GUID</primary></indexterm> instead.) This
|
|
identifier is a 128-bit quantity that is generated by an algorithm chosen
|
|
to make it very unlikely that the same identifier will be generated by
|
|
anyone else in the known universe using the same algorithm. Therefore,
|
|
for distributed systems, these identifiers provide a better uniqueness
|
|
guarantee than sequence generators, which
|
|
are only unique within a single database.
|
|
</para>
|
|
|
|
<para>
|
|
A UUID is written as a sequence of lower-case hexadecimal digits,
|
|
in several groups separated by hyphens, specifically a group of 8
|
|
digits followed by three groups of 4 digits followed by a group of
|
|
12 digits, for a total of 32 digits representing the 128 bits. An
|
|
example of a UUID in this standard form is:
|
|
<programlisting>
|
|
a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11
|
|
</programlisting>
|
|
<productname>PostgreSQL</productname> also accepts the following
|
|
alternative forms for input:
|
|
use of upper-case digits, the standard format surrounded by
|
|
braces, omitting some or all hyphens, adding a hyphen after any
|
|
group of four digits. Examples are:
|
|
<programlisting>
|
|
A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11
|
|
{a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11}
|
|
a0eebc999c0b4ef8bb6d6bb9bd380a11
|
|
a0ee-bc99-9c0b-4ef8-bb6d-6bb9-bd38-0a11
|
|
{a0eebc99-9c0b4ef8-bb6d6bb9-bd380a11}
|
|
</programlisting>
|
|
Output is always in the standard form.
|
|
</para>
|
|
|
|
<para>
|
|
See <xref linkend="functions-uuid"/> for how to generate a UUID in
|
|
<productname>PostgreSQL</productname>.
|
|
</para>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-xml">
|
|
<title><acronym>XML</acronym> Type</title>
|
|
|
|
<indexterm zone="datatype-xml">
|
|
<primary>XML</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>xml</type> data type can be used to store XML data. Its
|
|
advantage over storing XML data in a <type>text</type> field is that it
|
|
checks the input values for well-formedness, and there are support
|
|
functions to perform type-safe operations on it; see <xref
|
|
linkend="functions-xml"/>. Use of this data type requires the
|
|
installation to have been built with <command>configure
|
|
--with-libxml</command>.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>xml</type> type can store well-formed
|
|
<quote>documents</quote>, as defined by the XML standard, as well
|
|
as <quote>content</quote> fragments, which are defined by reference
|
|
to the more permissive
|
|
<ulink url="https://www.w3.org/TR/2010/REC-xpath-datamodel-20101214/#DocumentNode"><quote>document node</quote></ulink>
|
|
of the XQuery and XPath data model.
|
|
Roughly, this means that content fragments can have
|
|
more than one top-level element or character node. The expression
|
|
<literal><replaceable>xmlvalue</replaceable> IS DOCUMENT</literal>
|
|
can be used to evaluate whether a particular <type>xml</type>
|
|
value is a full document or only a content fragment.
|
|
</para>
|
|
|
|
<para>
|
|
Limits and compatibility notes for the <type>xml</type> data type
|
|
can be found in <xref linkend="xml-limits-conformance"/>.
|
|
</para>
|
|
|
|
<sect2 id="datatype-xml-creating">
|
|
<title>Creating XML Values</title>
|
|
<para>
|
|
To produce a value of type <type>xml</type> from character data,
|
|
use the function
|
|
<function>xmlparse</function>:<indexterm><primary>xmlparse</primary></indexterm>
|
|
<synopsis>
|
|
XMLPARSE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable>)
|
|
</synopsis>
|
|
Examples:
|
|
<programlisting><![CDATA[
|
|
XMLPARSE (DOCUMENT '<?xml version="1.0"?><book><title>Manual</title><chapter>...</chapter></book>')
|
|
XMLPARSE (CONTENT 'abc<foo>bar</foo><bar>foo</bar>')
|
|
]]></programlisting>
|
|
While this is the only way to convert character strings into XML
|
|
values according to the SQL standard, the PostgreSQL-specific
|
|
syntaxes:
|
|
<programlisting><![CDATA[
|
|
xml '<foo>bar</foo>'
|
|
'<foo>bar</foo>'::xml
|
|
]]></programlisting>
|
|
can also be used.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>xml</type> type does not validate input values
|
|
against a document type declaration
|
|
(DTD),<indexterm><primary>DTD</primary></indexterm>
|
|
even when the input value specifies a DTD.
|
|
There is also currently no built-in support for validating against
|
|
other XML schema languages such as XML Schema.
|
|
</para>
|
|
|
|
<para>
|
|
The inverse operation, producing a character string value from
|
|
<type>xml</type>, uses the function
|
|
<function>xmlserialize</function>:<indexterm><primary>xmlserialize</primary></indexterm>
|
|
<synopsis>
|
|
XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> [ [ NO ] INDENT ] )
|
|
</synopsis>
|
|
<replaceable>type</replaceable> can be
|
|
<type>character</type>, <type>character varying</type>, or
|
|
<type>text</type> (or an alias for one of those). Again, according
|
|
to the SQL standard, this is the only way to convert between type
|
|
<type>xml</type> and character types, but PostgreSQL also allows
|
|
you to simply cast the value.
|
|
</para>
|
|
|
|
<para>
|
|
The <literal>INDENT</literal> option causes the result to be
|
|
pretty-printed, while <literal>NO INDENT</literal> (which is the
|
|
default) just emits the original input string. Casting to a character
|
|
type likewise produces the original string.
|
|
</para>
|
|
|
|
<para>
|
|
When a character string value is cast to or from type
|
|
<type>xml</type> without going through <type>XMLPARSE</type> or
|
|
<type>XMLSERIALIZE</type>, respectively, the choice of
|
|
<literal>DOCUMENT</literal> versus <literal>CONTENT</literal> is
|
|
determined by the <quote>XML option</quote>
|
|
<indexterm><primary>XML option</primary></indexterm>
|
|
session configuration parameter, which can be set using the
|
|
standard command:
|
|
<synopsis>
|
|
SET XML OPTION { DOCUMENT | CONTENT };
|
|
</synopsis>
|
|
or the more PostgreSQL-like syntax
|
|
<synopsis>
|
|
SET xmloption TO { DOCUMENT | CONTENT };
|
|
</synopsis>
|
|
The default is <literal>CONTENT</literal>, so all forms of XML
|
|
data are allowed.
|
|
</para>
|
|
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-xml-encoding-handling">
|
|
<title>Encoding Handling</title>
|
|
<para>
|
|
Care must be taken when dealing with multiple character encodings
|
|
on the client, server, and in the XML data passed through them.
|
|
When using the text mode to pass queries to the server and query
|
|
results to the client (which is the normal mode), PostgreSQL
|
|
converts all character data passed between the client and the
|
|
server and vice versa to the character encoding of the respective
|
|
end; see <xref linkend="multibyte"/>. This includes string
|
|
representations of XML values, such as in the above examples.
|
|
This would ordinarily mean that encoding declarations contained in
|
|
XML data can become invalid as the character data is converted
|
|
to other encodings while traveling between client and server,
|
|
because the embedded encoding declaration is not changed. To cope
|
|
with this behavior, encoding declarations contained in
|
|
character strings presented for input to the <type>xml</type> type
|
|
are <emphasis>ignored</emphasis>, and content is assumed
|
|
to be in the current server encoding. Consequently, for correct
|
|
processing, character strings of XML data must be sent
|
|
from the client in the current client encoding. It is the
|
|
responsibility of the client to either convert documents to the
|
|
current client encoding before sending them to the server, or to
|
|
adjust the client encoding appropriately. On output, values of
|
|
type <type>xml</type> will not have an encoding declaration, and
|
|
clients should assume all data is in the current client
|
|
encoding.
|
|
</para>
|
|
|
|
<para>
|
|
When using binary mode to pass query parameters to the server
|
|
and query results back to the client, no encoding conversion
|
|
is performed, so the situation is different. In this case, an
|
|
encoding declaration in the XML data will be observed, and if it
|
|
is absent, the data will be assumed to be in UTF-8 (as required by
|
|
the XML standard; note that PostgreSQL does not support UTF-16).
|
|
On output, data will have an encoding declaration
|
|
specifying the client encoding, unless the client encoding is
|
|
UTF-8, in which case it will be omitted.
|
|
</para>
|
|
|
|
<para>
|
|
Needless to say, processing XML data with PostgreSQL will be less
|
|
error-prone and more efficient if the XML data encoding, client encoding,
|
|
and server encoding are the same. Since XML data is internally
|
|
processed in UTF-8, computations will be most efficient if the
|
|
server encoding is also UTF-8.
|
|
</para>
|
|
|
|
<caution>
|
|
<para>
|
|
Some XML-related functions may not work at all on non-ASCII data
|
|
when the server encoding is not UTF-8. This is known to be an
|
|
issue for <function>xmltable()</function> and <function>xpath()</function> in particular.
|
|
</para>
|
|
</caution>
|
|
</sect2>
|
|
|
|
<sect2 id="datatype-xml-accessing-xml-values">
|
|
<title>Accessing XML Values</title>
|
|
|
|
<para>
|
|
The <type>xml</type> data type is unusual in that it does not
|
|
provide any comparison operators. This is because there is no
|
|
well-defined and universally useful comparison algorithm for XML
|
|
data. One consequence of this is that you cannot retrieve rows by
|
|
comparing an <type>xml</type> column against a search value. XML
|
|
values should therefore typically be accompanied by a separate key
|
|
field such as an ID. An alternative solution for comparing XML
|
|
values is to convert them to character strings first, but note
|
|
that character string comparison has little to do with a useful
|
|
XML comparison method.
|
|
</para>
|
|
|
|
<para>
|
|
Since there are no comparison operators for the <type>xml</type>
|
|
data type, it is not possible to create an index directly on a
|
|
column of this type. If speedy searches in XML data are desired,
|
|
possible workarounds include casting the expression to a
|
|
character string type and indexing that, or indexing an XPath
|
|
expression. Of course, the actual query would have to be adjusted
|
|
to search by the indexed expression.
|
|
</para>
|
|
|
|
<para>
|
|
The text-search functionality in PostgreSQL can also be used to speed
|
|
up full-document searches of XML data. The necessary
|
|
preprocessing support is, however, not yet available in the PostgreSQL
|
|
distribution.
|
|
</para>
|
|
</sect2>
|
|
</sect1>
|
|
|
|
&json;
|
|
|
|
&array;
|
|
|
|
&rowtypes;
|
|
|
|
&rangetypes;
|
|
|
|
<sect1 id="domains">
|
|
<title>Domain Types</title>
|
|
|
|
<indexterm zone="domains">
|
|
<primary>domain</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="domains">
|
|
<primary>data type</primary>
|
|
<secondary>domain</secondary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
A <firstterm>domain</firstterm> is a user-defined data type that is
|
|
based on another <firstterm>underlying type</firstterm>. Optionally,
|
|
it can have constraints that restrict its valid values to a subset of
|
|
what the underlying type would allow. Otherwise it behaves like the
|
|
underlying type — for example, any operator or function that
|
|
can be applied to the underlying type will work on the domain type.
|
|
The underlying type can be any built-in or user-defined base type,
|
|
enum type, array type, composite type, range type, or another domain.
|
|
</para>
|
|
|
|
<para>
|
|
For example, we could create a domain over integers that accepts only
|
|
positive integers:
|
|
<programlisting>
|
|
CREATE DOMAIN posint AS integer CHECK (VALUE > 0);
|
|
CREATE TABLE mytable (id posint);
|
|
INSERT INTO mytable VALUES(1); -- works
|
|
INSERT INTO mytable VALUES(-1); -- fails
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
When an operator or function of the underlying type is applied to a
|
|
domain value, the domain is automatically down-cast to the underlying
|
|
type. Thus, for example, the result of <literal>mytable.id - 1</literal>
|
|
is considered to be of type <type>integer</type> not <type>posint</type>.
|
|
We could write <literal>(mytable.id - 1)::posint</literal> to cast the
|
|
result back to <type>posint</type>, causing the domain's constraints
|
|
to be rechecked. In this case, that would result in an error if the
|
|
expression had been applied to an <structfield>id</structfield> value of
|
|
1. Assigning a value of the underlying type to a field or variable of
|
|
the domain type is allowed without writing an explicit cast, but the
|
|
domain's constraints will be checked.
|
|
</para>
|
|
|
|
<para>
|
|
For additional information see <xref linkend="sql-createdomain"/>.
|
|
</para>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-oid">
|
|
<title>Object Identifier Types</title>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>object identifier</primary>
|
|
<secondary>data type</secondary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>oid</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regclass</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regcollation</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regconfig</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regdictionary</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regnamespace</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regoper</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regoperator</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regproc</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regprocedure</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regrole</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>regtype</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>xid8</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>cid</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>tid</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-oid">
|
|
<primary>xid</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
Object identifiers (OIDs) are used internally by
|
|
<productname>PostgreSQL</productname> as primary keys for various
|
|
system tables.
|
|
Type <type>oid</type> represents an object identifier. There are also
|
|
several alias types for <type>oid</type>, each
|
|
named <type>reg<replaceable>something</replaceable></type>.
|
|
<xref linkend="datatype-oid-table"/> shows an
|
|
overview.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>oid</type> type is currently implemented as an unsigned
|
|
four-byte integer. Therefore, it is not large enough to provide
|
|
database-wide uniqueness in large databases, or even in large
|
|
individual tables.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>oid</type> type itself has few operations beyond comparison.
|
|
It can be cast to integer, however, and then manipulated using the
|
|
standard integer operators. (Beware of possible
|
|
signed-versus-unsigned confusion if you do this.)
|
|
</para>
|
|
|
|
<para>
|
|
The OID alias types have no operations of their own except
|
|
for specialized input and output routines. These routines are able
|
|
to accept and display symbolic names for system objects, rather than
|
|
the raw numeric value that type <type>oid</type> would use. The alias
|
|
types allow simplified lookup of OID values for objects. For example,
|
|
to examine the <structname>pg_attribute</structname> rows related to a table
|
|
<literal>mytable</literal>, one could write:
|
|
<programlisting>
|
|
SELECT * FROM pg_attribute WHERE attrelid = 'mytable'::regclass;
|
|
</programlisting>
|
|
rather than:
|
|
<programlisting>
|
|
SELECT * FROM pg_attribute
|
|
WHERE attrelid = (SELECT oid FROM pg_class WHERE relname = 'mytable');
|
|
</programlisting>
|
|
While that doesn't look all that bad by itself, it's still oversimplified.
|
|
A far more complicated sub-select would be needed to
|
|
select the right OID if there are multiple tables named
|
|
<literal>mytable</literal> in different schemas.
|
|
The <type>regclass</type> input converter handles the table lookup according
|
|
to the schema path setting, and so it does the <quote>right thing</quote>
|
|
automatically. Similarly, casting a table's OID to
|
|
<type>regclass</type> is handy for symbolic display of a numeric OID.
|
|
</para>
|
|
|
|
<table id="datatype-oid-table">
|
|
<title>Object Identifier Types</title>
|
|
<tgroup cols="4">
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>References</entry>
|
|
<entry>Description</entry>
|
|
<entry>Value Example</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
|
|
<row>
|
|
<entry><type>oid</type></entry>
|
|
<entry>any</entry>
|
|
<entry>numeric object identifier</entry>
|
|
<entry><literal>564182</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regclass</type></entry>
|
|
<entry><structname>pg_class</structname></entry>
|
|
<entry>relation name</entry>
|
|
<entry><literal>pg_type</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regcollation</type></entry>
|
|
<entry><structname>pg_collation</structname></entry>
|
|
<entry>collation name</entry>
|
|
<entry><literal>"POSIX"</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regconfig</type></entry>
|
|
<entry><structname>pg_ts_config</structname></entry>
|
|
<entry>text search configuration</entry>
|
|
<entry><literal>english</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regdictionary</type></entry>
|
|
<entry><structname>pg_ts_dict</structname></entry>
|
|
<entry>text search dictionary</entry>
|
|
<entry><literal>simple</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regnamespace</type></entry>
|
|
<entry><structname>pg_namespace</structname></entry>
|
|
<entry>namespace name</entry>
|
|
<entry><literal>pg_catalog</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regoper</type></entry>
|
|
<entry><structname>pg_operator</structname></entry>
|
|
<entry>operator name</entry>
|
|
<entry><literal>+</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regoperator</type></entry>
|
|
<entry><structname>pg_operator</structname></entry>
|
|
<entry>operator with argument types</entry>
|
|
<entry><literal>*(integer,&zwsp;integer)</literal>
|
|
or <literal>-(NONE,&zwsp;integer)</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regproc</type></entry>
|
|
<entry><structname>pg_proc</structname></entry>
|
|
<entry>function name</entry>
|
|
<entry><literal>sum</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regprocedure</type></entry>
|
|
<entry><structname>pg_proc</structname></entry>
|
|
<entry>function with argument types</entry>
|
|
<entry><literal>sum(int4)</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regrole</type></entry>
|
|
<entry><structname>pg_authid</structname></entry>
|
|
<entry>role name</entry>
|
|
<entry><literal>smithee</literal></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>regtype</type></entry>
|
|
<entry><structname>pg_type</structname></entry>
|
|
<entry>data type name</entry>
|
|
<entry><literal>integer</literal></entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
All of the OID alias types for objects that are grouped by namespace
|
|
accept schema-qualified names, and will
|
|
display schema-qualified names on output if the object would not
|
|
be found in the current search path without being qualified.
|
|
For example, <literal>myschema.mytable</literal> is acceptable input
|
|
for <type>regclass</type> (if there is such a table). That value
|
|
might be output as <literal>myschema.mytable</literal>, or
|
|
just <literal>mytable</literal>, depending on the current search path.
|
|
The <type>regproc</type> and <type>regoper</type> alias types will only
|
|
accept input names that are unique (not overloaded), so they are
|
|
of limited use; for most uses <type>regprocedure</type> or
|
|
<type>regoperator</type> are more appropriate. For <type>regoperator</type>,
|
|
unary operators are identified by writing <literal>NONE</literal> for the unused
|
|
operand.
|
|
</para>
|
|
|
|
<para>
|
|
The input functions for these types allow whitespace between tokens,
|
|
and will fold upper-case letters to lower case, except within double
|
|
quotes; this is done to make the syntax rules similar to the way
|
|
object names are written in SQL. Conversely, the output functions
|
|
will use double quotes if needed to make the output be a valid SQL
|
|
identifier. For example, the OID of a function
|
|
named <literal>Foo</literal> (with upper case <literal>F</literal>)
|
|
taking two integer arguments could be entered as
|
|
<literal>' "Foo" ( int, integer ) '::regprocedure</literal>. The
|
|
output would look like <literal>"Foo"(integer,integer)</literal>.
|
|
Both the function name and the argument type names could be
|
|
schema-qualified, too.
|
|
</para>
|
|
|
|
<para>
|
|
Many built-in <productname>PostgreSQL</productname> functions accept
|
|
the OID of a table, or another kind of database object, and for
|
|
convenience are declared as taking <type>regclass</type> (or the
|
|
appropriate OID alias type). This means you do not have to look up
|
|
the object's OID by hand, but can just enter its name as a string
|
|
literal. For example, the <function>nextval(regclass)</function> function
|
|
takes a sequence relation's OID, so you could call it like this:
|
|
<programlisting>
|
|
nextval('foo') <lineannotation>operates on sequence <literal>foo</literal></lineannotation>
|
|
nextval('FOO') <lineannotation>same as above</lineannotation>
|
|
nextval('"Foo"') <lineannotation>operates on sequence <literal>Foo</literal></lineannotation>
|
|
nextval('myschema.foo') <lineannotation>operates on <literal>myschema.foo</literal></lineannotation>
|
|
nextval('"myschema".foo') <lineannotation>same as above</lineannotation>
|
|
nextval('foo') <lineannotation>searches search path for <literal>foo</literal></lineannotation>
|
|
</programlisting>
|
|
</para>
|
|
|
|
<note>
|
|
<para>
|
|
When you write the argument of such a function as an unadorned
|
|
literal string, it becomes a constant of type <type>regclass</type>
|
|
(or the appropriate type).
|
|
Since this is really just an OID, it will track the originally
|
|
identified object despite later renaming, schema reassignment,
|
|
etc. This <quote>early binding</quote> behavior is usually desirable for
|
|
object references in column defaults and views. But sometimes you might
|
|
want <quote>late binding</quote> where the object reference is resolved
|
|
at run time. To get late-binding behavior, force the constant to be
|
|
stored as a <type>text</type> constant instead of <type>regclass</type>:
|
|
<programlisting>
|
|
nextval('foo'::text) <lineannotation><literal>foo</literal> is looked up at runtime</lineannotation>
|
|
</programlisting>
|
|
The <function>to_regclass()</function> function and its siblings
|
|
can also be used to perform run-time lookups. See
|
|
<xref linkend="functions-info-catalog-table"/>.
|
|
</para>
|
|
</note>
|
|
|
|
<para>
|
|
Another practical example of use of <type>regclass</type>
|
|
is to look up the OID of a table listed in
|
|
the <literal>information_schema</literal> views, which don't supply
|
|
such OIDs directly. One might for example wish to call
|
|
the <function>pg_relation_size()</function> function, which requires
|
|
the table OID. Taking the above rules into account, the correct way
|
|
to do that is
|
|
<programlisting>
|
|
SELECT table_schema, table_name,
|
|
pg_relation_size((quote_ident(table_schema) || '.' ||
|
|
quote_ident(table_name))::regclass)
|
|
FROM information_schema.tables
|
|
WHERE ...
|
|
</programlisting>
|
|
The <function>quote_ident()</function> function will take care of
|
|
double-quoting the identifiers where needed. The seemingly easier
|
|
<programlisting>
|
|
SELECT pg_relation_size(table_name)
|
|
FROM information_schema.tables
|
|
WHERE ...
|
|
</programlisting>
|
|
is <emphasis>not recommended</emphasis>, because it will fail for
|
|
tables that are outside your search path or have names that require
|
|
quoting.
|
|
</para>
|
|
|
|
<para>
|
|
An additional property of most of the OID alias types is the creation of
|
|
dependencies. If a
|
|
constant of one of these types appears in a stored expression
|
|
(such as a column default expression or view), it creates a dependency
|
|
on the referenced object. For example, if a column has a default
|
|
expression <literal>nextval('my_seq'::regclass)</literal>,
|
|
<productname>PostgreSQL</productname>
|
|
understands that the default expression depends on the sequence
|
|
<literal>my_seq</literal>, so the system will not let the sequence
|
|
be dropped without first removing the default expression. The
|
|
alternative of <literal>nextval('my_seq'::text)</literal> does not
|
|
create a dependency.
|
|
(<type>regrole</type> is an exception to this property. Constants of this
|
|
type are not allowed in stored expressions.)
|
|
</para>
|
|
|
|
<para>
|
|
Another identifier type used by the system is <type>xid</type>, or transaction
|
|
(abbreviated <abbrev>xact</abbrev>) identifier. This is the data type of the system columns
|
|
<structfield>xmin</structfield> and <structfield>xmax</structfield>. Transaction identifiers are 32-bit quantities.
|
|
In some contexts, a 64-bit variant <type>xid8</type> is used. Unlike
|
|
<type>xid</type> values, <type>xid8</type> values increase strictly
|
|
monotonically and cannot be reused in the lifetime of a database
|
|
cluster. See <xref linkend="transaction-id"/> for more details.
|
|
</para>
|
|
|
|
<para>
|
|
A third identifier type used by the system is <type>cid</type>, or
|
|
command identifier. This is the data type of the system columns
|
|
<structfield>cmin</structfield> and <structfield>cmax</structfield>. Command identifiers are also 32-bit quantities.
|
|
</para>
|
|
|
|
<para>
|
|
A final identifier type used by the system is <type>tid</type>, or tuple
|
|
identifier (row identifier). This is the data type of the system column
|
|
<structfield>ctid</structfield>. A tuple ID is a pair
|
|
(block number, tuple index within block) that identifies the
|
|
physical location of the row within its table.
|
|
</para>
|
|
|
|
<para>
|
|
(The system columns are further explained in <xref
|
|
linkend="ddl-system-columns"/>.)
|
|
</para>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-pg-lsn">
|
|
<title><type>pg_lsn</type> Type</title>
|
|
|
|
<indexterm zone="datatype-pg-lsn">
|
|
<primary>pg_lsn</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <type>pg_lsn</type> data type can be used to store LSN (Log Sequence
|
|
Number) data which is a pointer to a location in the WAL. This type is a
|
|
representation of <type>XLogRecPtr</type> and an internal system type of
|
|
<productname>PostgreSQL</productname>.
|
|
</para>
|
|
|
|
<para>
|
|
Internally, an LSN is a 64-bit integer, representing a byte position in
|
|
the write-ahead log stream. It is printed as two hexadecimal numbers of
|
|
up to 8 digits each, separated by a slash; for example,
|
|
<literal>16/B374D848</literal>. The <type>pg_lsn</type> type supports the
|
|
standard comparison operators, like <literal>=</literal> and
|
|
<literal>></literal>. Two LSNs can be subtracted using the
|
|
<literal>-</literal> operator; the result is the number of bytes separating
|
|
those write-ahead log locations. Also the number of bytes can be
|
|
added into and subtracted from LSN using the
|
|
<literal>+(pg_lsn,numeric)</literal> and
|
|
<literal>-(pg_lsn,numeric)</literal> operators, respectively. Note that
|
|
the calculated LSN should be in the range of <type>pg_lsn</type> type,
|
|
i.e., between <literal>0/0</literal> and
|
|
<literal>FFFFFFFF/FFFFFFFF</literal>.
|
|
</para>
|
|
</sect1>
|
|
|
|
<sect1 id="datatype-pseudo">
|
|
<title>Pseudo-Types</title>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>record</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>any</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anyelement</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anyarray</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anynonarray</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anyenum</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anyrange</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anymultirange</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anycompatible</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anycompatiblearray</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anycompatiblenonarray</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anycompatiblerange</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>anycompatiblemultirange</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>void</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>trigger</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>event_trigger</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>pg_ddl_command</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>language_handler</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>fdw_handler</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>table_am_handler</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>index_am_handler</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>tsm_handler</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>cstring</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>internal</primary>
|
|
</indexterm>
|
|
|
|
<indexterm zone="datatype-pseudo">
|
|
<primary>unknown</primary>
|
|
</indexterm>
|
|
|
|
<para>
|
|
The <productname>PostgreSQL</productname> type system contains a
|
|
number of special-purpose entries that are collectively called
|
|
<firstterm>pseudo-types</firstterm>. A pseudo-type cannot be used as a
|
|
column data type, but it can be used to declare a function's
|
|
argument or result type. Each of the available pseudo-types is
|
|
useful in situations where a function's behavior does not
|
|
correspond to simply taking or returning a value of a specific
|
|
<acronym>SQL</acronym> data type. <xref
|
|
linkend="datatype-pseudotypes-table"/> lists the existing
|
|
pseudo-types.
|
|
</para>
|
|
|
|
<table id="datatype-pseudotypes-table">
|
|
<title>Pseudo-Types</title>
|
|
<tgroup cols="2">
|
|
<colspec colname="col1" colwidth="2*"/>
|
|
<colspec colname="col2" colwidth="3*"/>
|
|
<thead>
|
|
<row>
|
|
<entry>Name</entry>
|
|
<entry>Description</entry>
|
|
</row>
|
|
</thead>
|
|
|
|
<tbody>
|
|
<row>
|
|
<entry><type>any</type></entry>
|
|
<entry>Indicates that a function accepts any input data type.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anyelement</type></entry>
|
|
<entry>Indicates that a function accepts any data type
|
|
(see <xref linkend="extend-types-polymorphic"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anyarray</type></entry>
|
|
<entry>Indicates that a function accepts any array data type
|
|
(see <xref linkend="extend-types-polymorphic"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anynonarray</type></entry>
|
|
<entry>Indicates that a function accepts any non-array data type
|
|
(see <xref linkend="extend-types-polymorphic"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anyenum</type></entry>
|
|
<entry>Indicates that a function accepts any enum data type
|
|
(see <xref linkend="extend-types-polymorphic"/> and
|
|
<xref linkend="datatype-enum"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anyrange</type></entry>
|
|
<entry>Indicates that a function accepts any range data type
|
|
(see <xref linkend="extend-types-polymorphic"/> and
|
|
<xref linkend="rangetypes"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anymultirange</type></entry>
|
|
<entry>Indicates that a function accepts any multirange data type
|
|
(see <xref linkend="extend-types-polymorphic"/> and
|
|
<xref linkend="rangetypes"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anycompatible</type></entry>
|
|
<entry>Indicates that a function accepts any data type,
|
|
with automatic promotion of multiple arguments to a common data type
|
|
(see <xref linkend="extend-types-polymorphic"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anycompatiblearray</type></entry>
|
|
<entry>Indicates that a function accepts any array data type,
|
|
with automatic promotion of multiple arguments to a common data type
|
|
(see <xref linkend="extend-types-polymorphic"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anycompatiblenonarray</type></entry>
|
|
<entry>Indicates that a function accepts any non-array data type,
|
|
with automatic promotion of multiple arguments to a common data type
|
|
(see <xref linkend="extend-types-polymorphic"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anycompatiblerange</type></entry>
|
|
<entry>Indicates that a function accepts any range data type,
|
|
with automatic promotion of multiple arguments to a common data type
|
|
(see <xref linkend="extend-types-polymorphic"/> and
|
|
<xref linkend="rangetypes"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>anycompatiblemultirange</type></entry>
|
|
<entry>Indicates that a function accepts any multirange data type,
|
|
with automatic promotion of multiple arguments to a common data type
|
|
(see <xref linkend="extend-types-polymorphic"/> and
|
|
<xref linkend="rangetypes"/>).</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>cstring</type></entry>
|
|
<entry>Indicates that a function accepts or returns a null-terminated C string.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>internal</type></entry>
|
|
<entry>Indicates that a function accepts or returns a server-internal
|
|
data type.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>language_handler</type></entry>
|
|
<entry>A procedural language call handler is declared to return <type>language_handler</type>.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>fdw_handler</type></entry>
|
|
<entry>A foreign-data wrapper handler is declared to return <type>fdw_handler</type>.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>table_am_handler</type></entry>
|
|
<entry>A table access method handler is declared to return <type>table_am_handler</type>.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>index_am_handler</type></entry>
|
|
<entry>An index access method handler is declared to return <type>index_am_handler</type>.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>tsm_handler</type></entry>
|
|
<entry>A tablesample method handler is declared to return <type>tsm_handler</type>.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>record</type></entry>
|
|
<entry>Identifies a function taking or returning an unspecified row type.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>trigger</type></entry>
|
|
<entry>A trigger function is declared to return <type>trigger.</type></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>event_trigger</type></entry>
|
|
<entry>An event trigger function is declared to return <type>event_trigger.</type></entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>pg_ddl_command</type></entry>
|
|
<entry>Identifies a representation of DDL commands that is available to event triggers.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>void</type></entry>
|
|
<entry>Indicates that a function returns no value.</entry>
|
|
</row>
|
|
|
|
<row>
|
|
<entry><type>unknown</type></entry>
|
|
<entry>Identifies a not-yet-resolved type, e.g., of an undecorated
|
|
string literal.</entry>
|
|
</row>
|
|
</tbody>
|
|
</tgroup>
|
|
</table>
|
|
|
|
<para>
|
|
Functions coded in C (whether built-in or dynamically loaded) can be
|
|
declared to accept or return any of these pseudo-types. It is up to
|
|
the function author to ensure that the function will behave safely
|
|
when a pseudo-type is used as an argument type.
|
|
</para>
|
|
|
|
<para>
|
|
Functions coded in procedural languages can use pseudo-types only as
|
|
allowed by their implementation languages. At present most procedural
|
|
languages forbid use of a pseudo-type as an argument type, and allow
|
|
only <type>void</type> and <type>record</type> as a result type (plus
|
|
<type>trigger</type> or <type>event_trigger</type> when the function is used
|
|
as a trigger or event trigger). Some also support polymorphic functions
|
|
using the polymorphic pseudo-types, which are shown above and discussed
|
|
in detail in <xref linkend="extend-types-polymorphic"/>.
|
|
</para>
|
|
|
|
<para>
|
|
The <type>internal</type> pseudo-type is used to declare functions
|
|
that are meant only to be called internally by the database
|
|
system, and not by direct invocation in an <acronym>SQL</acronym>
|
|
query. If a function has at least one <type>internal</type>-type
|
|
argument then it cannot be called from <acronym>SQL</acronym>. To
|
|
preserve the type safety of this restriction it is important to
|
|
follow this coding rule: do not create any function that is
|
|
declared to return <type>internal</type> unless it has at least one
|
|
<type>internal</type> argument.
|
|
</para>
|
|
|
|
</sect1>
|
|
|
|
</chapter>
|