1999-07-22 17:09:15 +02:00
|
|
|
<!--
|
2010-09-20 22:08:53 +02:00
|
|
|
doc/src/sgml/ref/create_type.sgml
|
2001-12-08 04:24:40 +01:00
|
|
|
PostgreSQL documentation
|
1999-07-22 17:09:15 +02:00
|
|
|
-->
|
|
|
|
|
2017-10-20 03:16:39 +02:00
|
|
|
<refentry id="sql-createtype">
|
2014-02-24 03:25:35 +01:00
|
|
|
<indexterm zone="sql-createtype">
|
|
|
|
<primary>CREATE TYPE</primary>
|
|
|
|
</indexterm>
|
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<refmeta>
|
2010-04-03 09:23:02 +02:00
|
|
|
<refentrytitle>CREATE TYPE</refentrytitle>
|
2008-11-14 11:22:48 +01:00
|
|
|
<manvolnum>7</manvolnum>
|
1999-07-06 19:16:42 +02:00
|
|
|
<refmiscinfo>SQL - Language Statements</refmiscinfo>
|
|
|
|
</refmeta>
|
2003-04-22 12:08:08 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<refnamediv>
|
2003-04-22 12:08:08 +02:00
|
|
|
<refname>CREATE TYPE</refname>
|
|
|
|
<refpurpose>define a new data type</refpurpose>
|
1998-12-29 03:24:47 +01:00
|
|
|
</refnamediv>
|
2003-04-22 12:08:08 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<refsynopsisdiv>
|
2003-04-22 12:08:08 +02:00
|
|
|
<synopsis>
|
2003-09-22 02:16:58 +02:00
|
|
|
CREATE TYPE <replaceable class="parameter">name</replaceable> AS
|
2017-10-09 04:00:57 +02:00
|
|
|
( [ <replaceable class="parameter">attribute_name</replaceable> <replaceable class="parameter">data_type</replaceable> [ COLLATE <replaceable>collation</replaceable> ] [, ... ] ] )
|
2003-05-09 00:19:58 +02:00
|
|
|
|
2007-04-02 05:49:42 +02:00
|
|
|
CREATE TYPE <replaceable class="parameter">name</replaceable> AS ENUM
|
2009-12-26 17:55:21 +01:00
|
|
|
( [ '<replaceable class="parameter">label</replaceable>' [, ... ] ] )
|
2007-04-02 05:49:42 +02:00
|
|
|
|
2011-11-03 12:16:28 +01:00
|
|
|
CREATE TYPE <replaceable class="parameter">name</replaceable> AS RANGE (
|
2011-11-19 00:23:55 +01:00
|
|
|
SUBTYPE = <replaceable class="parameter">subtype</replaceable>
|
2011-11-03 12:16:28 +01:00
|
|
|
[ , SUBTYPE_OPCLASS = <replaceable class="parameter">subtype_operator_class</replaceable> ]
|
2011-11-19 00:23:55 +01:00
|
|
|
[ , COLLATION = <replaceable class="parameter">collation</replaceable> ]
|
2011-11-03 12:16:28 +01:00
|
|
|
[ , CANONICAL = <replaceable class="parameter">canonical_function</replaceable> ]
|
2011-11-19 00:23:55 +01:00
|
|
|
[ , SUBTYPE_DIFF = <replaceable class="parameter">subtype_diff_function</replaceable> ]
|
Multirange datatypes
Multiranges are basically sorted arrays of non-overlapping ranges with
set-theoretic operations defined over them.
Since v14, each range type automatically gets a corresponding multirange
datatype. There are both manual and automatic mechanisms for naming multirange
types. Once can specify multirange type name using multirange_type_name
attribute in CREATE TYPE. Otherwise, a multirange type name is generated
automatically. If the range type name contains "range" then we change that to
"multirange". Otherwise, we add "_multirange" to the end.
Implementation of multiranges comes with a space-efficient internal
representation format, which evades extra paddings and duplicated storage of
oids. Altogether this format allows fetching a particular range by its index
in O(n).
Statistic gathering and selectivity estimation are implemented for multiranges.
For this purpose, stored multirange is approximated as union range without gaps.
This field will likely need improvements in the future.
Catversion is bumped.
Discussion: https://postgr.es/m/CALNJ-vSUpQ_Y%3DjXvTxt1VYFztaBSsWVXeF1y6gTYQ4bOiWDLgQ%40mail.gmail.com
Discussion: https://postgr.es/m/a0b8026459d1e6167933be2104a6174e7d40d0ab.camel%40j-davis.com#fe7218c83b08068bfffb0c5293eceda0
Author: Paul Jungwirth, revised by me
Reviewed-by: David Fetter, Corey Huinker, Jeff Davis, Pavel Stehule
Reviewed-by: Alvaro Herrera, Tom Lane, Isaac Morland, David G. Johnston
Reviewed-by: Zhihong Yu, Alexander Korotkov
2020-12-20 05:20:33 +01:00
|
|
|
[ , MULTIRANGE_TYPE_NAME = <replaceable class="parameter">multirange_type_name</replaceable> ]
|
2011-11-03 12:16:28 +01:00
|
|
|
)
|
|
|
|
|
2003-09-22 02:16:58 +02:00
|
|
|
CREATE TYPE <replaceable class="parameter">name</replaceable> (
|
2003-05-09 00:19:58 +02:00
|
|
|
INPUT = <replaceable class="parameter">input_function</replaceable>,
|
|
|
|
OUTPUT = <replaceable class="parameter">output_function</replaceable>
|
|
|
|
[ , RECEIVE = <replaceable class="parameter">receive_function</replaceable> ]
|
|
|
|
[ , SEND = <replaceable class="parameter">send_function</replaceable> ]
|
2006-12-30 22:21:56 +01:00
|
|
|
[ , TYPMOD_IN = <replaceable class="parameter">type_modifier_input_function</replaceable> ]
|
|
|
|
[ , TYPMOD_OUT = <replaceable class="parameter">type_modifier_output_function</replaceable> ]
|
2004-02-13 00:41:04 +01:00
|
|
|
[ , ANALYZE = <replaceable class="parameter">analyze_function</replaceable> ]
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
[ , SUBSCRIPT = <replaceable class="parameter">subscript_function</replaceable> ]
|
2003-05-09 00:19:58 +02:00
|
|
|
[ , INTERNALLENGTH = { <replaceable class="parameter">internallength</replaceable> | VARIABLE } ]
|
2000-08-25 01:36:29 +02:00
|
|
|
[ , PASSEDBYVALUE ]
|
|
|
|
[ , ALIGNMENT = <replaceable class="parameter">alignment</replaceable> ]
|
|
|
|
[ , STORAGE = <replaceable class="parameter">storage</replaceable> ]
|
2008-11-30 20:01:29 +01:00
|
|
|
[ , LIKE = <replaceable class="parameter">like_type</replaceable> ]
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
[ , CATEGORY = <replaceable class="parameter">category</replaceable> ]
|
|
|
|
[ , PREFERRED = <replaceable class="parameter">preferred</replaceable> ]
|
2003-05-09 00:19:58 +02:00
|
|
|
[ , DEFAULT = <replaceable class="parameter">default</replaceable> ]
|
|
|
|
[ , ELEMENT = <replaceable class="parameter">element</replaceable> ]
|
|
|
|
[ , DELIMITER = <replaceable class="parameter">delimiter</replaceable> ]
|
2011-02-08 22:04:18 +01:00
|
|
|
[ , COLLATABLE = <replaceable class="parameter">collatable</replaceable> ]
|
2000-08-25 01:36:29 +02:00
|
|
|
)
|
2006-02-28 23:37:27 +01:00
|
|
|
|
|
|
|
CREATE TYPE <replaceable class="parameter">name</replaceable>
|
2003-04-22 12:08:08 +02:00
|
|
|
</synopsis>
|
1999-07-06 19:16:42 +02:00
|
|
|
</refsynopsisdiv>
|
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
<refsect1>
|
|
|
|
<title>Description</title>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<para>
|
2003-04-22 12:08:08 +02:00
|
|
|
<command>CREATE TYPE</command> registers a new data type for use in
|
2005-01-04 01:39:53 +01:00
|
|
|
the current database. The user who defines a type becomes its
|
2003-04-22 12:08:08 +02:00
|
|
|
owner.
|
2002-04-23 04:07:16 +02:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
2003-04-22 12:08:08 +02:00
|
|
|
If a schema name is given then the type is created in the specified
|
|
|
|
schema. Otherwise it is created in the current schema. The type
|
|
|
|
name must be distinct from the name of any existing type or domain
|
|
|
|
in the same schema. (Because tables have associated data types,
|
|
|
|
the type name must also be distinct from the name of any existing
|
|
|
|
table in the same schema.)
|
1998-07-29 08:23:26 +02:00
|
|
|
</para>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<para>
|
|
|
|
There are five forms of <command>CREATE TYPE</command>, as shown in the
|
|
|
|
syntax synopsis above. They respectively create a <firstterm>composite
|
2017-10-09 03:44:17 +02:00
|
|
|
type</firstterm>, an <firstterm>enum type</firstterm>, a <firstterm>range type</firstterm>, a
|
|
|
|
<firstterm>base type</firstterm>, or a <firstterm>shell type</firstterm>. The first four
|
2011-11-19 00:23:55 +01:00
|
|
|
of these are discussed in turn below. A shell type is simply a placeholder
|
|
|
|
for a type to be defined later; it is created by issuing <command>CREATE
|
|
|
|
TYPE</command> with no parameters except for the type name. Shell types
|
|
|
|
are needed as forward references when creating range types and base types,
|
|
|
|
as discussed in those sections.
|
|
|
|
</para>
|
|
|
|
|
2003-05-09 00:19:58 +02:00
|
|
|
<refsect2>
|
|
|
|
<title>Composite Types</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The first form of <command>CREATE TYPE</command>
|
|
|
|
creates a composite type.
|
|
|
|
The composite type is specified by a list of attribute names and data types.
|
2011-04-17 23:05:51 +02:00
|
|
|
An attribute's collation can be specified too, if its data type is
|
|
|
|
collatable. A composite type is essentially the same as the row type
|
2003-05-09 00:19:58 +02:00
|
|
|
of a table, but using <command>CREATE TYPE</command> avoids the need to
|
|
|
|
create an actual table when all that is wanted is to define a type.
|
2011-04-17 23:05:51 +02:00
|
|
|
A stand-alone composite type is useful, for example, as the argument or
|
|
|
|
return type of a function.
|
2003-05-09 00:19:58 +02:00
|
|
|
</para>
|
2011-12-19 23:05:19 +01:00
|
|
|
|
|
|
|
<para>
|
|
|
|
To be able to create a composite type, you must
|
|
|
|
have <literal>USAGE</literal> privilege on all attribute types.
|
|
|
|
</para>
|
2003-05-09 00:19:58 +02:00
|
|
|
</refsect2>
|
|
|
|
|
2017-10-20 03:16:39 +02:00
|
|
|
<refsect2 id="sql-createtype-enum">
|
2007-04-02 05:49:42 +02:00
|
|
|
<title>Enumerated Types</title>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The second form of <command>CREATE TYPE</command> creates an enumerated
|
2017-11-23 15:39:47 +01:00
|
|
|
(enum) type, as described in <xref linkend="datatype-enum"/>.
|
2018-11-13 10:42:43 +01:00
|
|
|
Enum types take a list of quoted labels, each of which
|
2011-11-19 00:23:55 +01:00
|
|
|
must be less than <symbol>NAMEDATALEN</symbol> bytes long (64 bytes in a
|
2018-11-13 10:42:43 +01:00
|
|
|
standard <productname>PostgreSQL</productname> build). (It is possible to
|
|
|
|
create an enumerated type with zero labels, but such a type cannot be used
|
Improve <xref> vs. <command> formatting in the documentation
SQL commands are generally marked up as <command>, except when a link
to a reference page is used using <xref>. But the latter doesn't
create monospace markup, so this looks strange especially when a
paragraph contains a mix of links and non-links.
We considered putting <command> in the <refentrytitle> on the target
side, but that creates some formatting side effects elsewhere.
Generally, it seems safer to solve this on the link source side.
We can't put the <xref> inside the <command>; the DTD doesn't allow
this. DocBook 5 would allow the <command> to have the linkend
attribute itself, but we are not there yet.
So to solve this for now, convert the <xref>s to <link> plus
<command>. This gives the correct look and also gives some more
flexibility what we can put into the link text (e.g., subcommands or
other clauses). In the future, these could then be converted to
DocBook 5 style.
I haven't converted absolutely all xrefs to SQL command reference
pages, only those where we care about the appearance of the link text
or where it was otherwise appropriate to make the appearance match a
bit better. Also in some cases, the links where repetitive, so in
those cases the links where just removed and replaced by a plain
<command>. In cases where we just want the link and don't
specifically care about the generated link text (typically phrased
"for further information see <xref ...>") the xref is kept.
Reported-by: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org>
Discussion: https://www.postgresql.org/message-id/flat/87o8pco34z.fsf@wibble.ilmari.org
2020-10-03 16:16:51 +02:00
|
|
|
to hold values before at least one label is added using <link
|
|
|
|
linkend="sql-altertype"><command>ALTER TYPE</command></link>.)
|
2007-04-02 05:49:42 +02:00
|
|
|
</para>
|
|
|
|
</refsect2>
|
|
|
|
|
2017-10-20 03:16:39 +02:00
|
|
|
<refsect2 id="sql-createtype-range">
|
2011-11-03 12:16:28 +01:00
|
|
|
<title>Range Types</title>
|
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<para>
|
2011-11-03 12:16:28 +01:00
|
|
|
The third form of <command>CREATE TYPE</command> creates a new
|
2017-11-23 15:39:47 +01:00
|
|
|
range type, as described in <xref linkend="rangetypes"/>.
|
2011-11-19 00:23:55 +01:00
|
|
|
</para>
|
2011-11-03 12:16:28 +01:00
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<para>
|
|
|
|
The range type's <replaceable class="parameter">subtype</replaceable> can
|
2011-11-24 01:13:56 +01:00
|
|
|
be any type with an associated b-tree operator class (to determine the
|
2011-11-19 00:23:55 +01:00
|
|
|
ordering of values for the range type). Normally the subtype's default
|
2011-11-24 01:13:56 +01:00
|
|
|
b-tree operator class is used to determine ordering; to use a non-default
|
2013-05-21 03:13:13 +02:00
|
|
|
operator class, specify its name with <replaceable
|
2011-11-19 00:23:55 +01:00
|
|
|
class="parameter">subtype_opclass</replaceable>. If the subtype is
|
|
|
|
collatable, and you want to use a non-default collation in the range's
|
|
|
|
ordering, specify the desired collation with the <replaceable
|
|
|
|
class="parameter">collation</replaceable> option.
|
|
|
|
</para>
|
2011-11-03 12:16:28 +01:00
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<para>
|
|
|
|
The optional <replaceable class="parameter">canonical</replaceable>
|
|
|
|
function must take one argument of the range type being defined, and
|
2011-11-24 01:13:56 +01:00
|
|
|
return a value of the same type. This is used to convert range values
|
|
|
|
to a canonical form, when applicable. See <xref
|
2017-11-23 15:39:47 +01:00
|
|
|
linkend="rangetypes-defining"/> for more information. Creating a
|
2011-11-24 01:13:56 +01:00
|
|
|
<replaceable class="parameter">canonical</replaceable> function
|
|
|
|
is a bit tricky, since it must be defined before the range type can be
|
|
|
|
declared. To do this, you must first create a shell type, which is a
|
2011-11-03 12:16:28 +01:00
|
|
|
placeholder type that has no properties except a name and an
|
|
|
|
owner. This is done by issuing the command <literal>CREATE TYPE
|
2017-10-09 03:44:17 +02:00
|
|
|
<replaceable>name</replaceable></literal>, with no additional parameters. Then
|
2011-11-24 01:13:56 +01:00
|
|
|
the function can be declared using the shell type as argument and result,
|
|
|
|
and finally the range type can be declared using the same name. This
|
|
|
|
automatically replaces the shell type entry with a valid range type.
|
2011-11-19 00:23:55 +01:00
|
|
|
</para>
|
2011-11-03 12:16:28 +01:00
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<para>
|
|
|
|
The optional <replaceable class="parameter">subtype_diff</replaceable>
|
|
|
|
function must take two values of the
|
|
|
|
<replaceable class="parameter">subtype</replaceable> type as argument,
|
|
|
|
and return a <type>double precision</type> value representing the
|
|
|
|
difference between the two given values. While this is optional,
|
|
|
|
providing it allows much greater efficiency of GiST indexes on columns of
|
2017-11-23 15:39:47 +01:00
|
|
|
the range type. See <xref linkend="rangetypes-defining"/> for more
|
2011-11-24 01:13:56 +01:00
|
|
|
information.
|
2011-11-19 00:23:55 +01:00
|
|
|
</para>
|
Multirange datatypes
Multiranges are basically sorted arrays of non-overlapping ranges with
set-theoretic operations defined over them.
Since v14, each range type automatically gets a corresponding multirange
datatype. There are both manual and automatic mechanisms for naming multirange
types. Once can specify multirange type name using multirange_type_name
attribute in CREATE TYPE. Otherwise, a multirange type name is generated
automatically. If the range type name contains "range" then we change that to
"multirange". Otherwise, we add "_multirange" to the end.
Implementation of multiranges comes with a space-efficient internal
representation format, which evades extra paddings and duplicated storage of
oids. Altogether this format allows fetching a particular range by its index
in O(n).
Statistic gathering and selectivity estimation are implemented for multiranges.
For this purpose, stored multirange is approximated as union range without gaps.
This field will likely need improvements in the future.
Catversion is bumped.
Discussion: https://postgr.es/m/CALNJ-vSUpQ_Y%3DjXvTxt1VYFztaBSsWVXeF1y6gTYQ4bOiWDLgQ%40mail.gmail.com
Discussion: https://postgr.es/m/a0b8026459d1e6167933be2104a6174e7d40d0ab.camel%40j-davis.com#fe7218c83b08068bfffb0c5293eceda0
Author: Paul Jungwirth, revised by me
Reviewed-by: David Fetter, Corey Huinker, Jeff Davis, Pavel Stehule
Reviewed-by: Alvaro Herrera, Tom Lane, Isaac Morland, David G. Johnston
Reviewed-by: Zhihong Yu, Alexander Korotkov
2020-12-20 05:20:33 +01:00
|
|
|
|
|
|
|
<para>
|
|
|
|
The optional <replaceable class="parameter">multirange_type_name</replaceable>
|
|
|
|
parameter specifies the name of the corresponding multirange type. If not
|
|
|
|
specified, this name is chosen automatically as follows.
|
2020-12-24 09:05:49 +01:00
|
|
|
If the range type name contains the substring <literal>range</literal>, then
|
|
|
|
the multirange type name is formed by replacement of the <literal>range</literal>
|
|
|
|
substring with <literal>multirange</literal> in the range
|
|
|
|
type name. Otherwise, the multirange type name is formed by appending a
|
Multirange datatypes
Multiranges are basically sorted arrays of non-overlapping ranges with
set-theoretic operations defined over them.
Since v14, each range type automatically gets a corresponding multirange
datatype. There are both manual and automatic mechanisms for naming multirange
types. Once can specify multirange type name using multirange_type_name
attribute in CREATE TYPE. Otherwise, a multirange type name is generated
automatically. If the range type name contains "range" then we change that to
"multirange". Otherwise, we add "_multirange" to the end.
Implementation of multiranges comes with a space-efficient internal
representation format, which evades extra paddings and duplicated storage of
oids. Altogether this format allows fetching a particular range by its index
in O(n).
Statistic gathering and selectivity estimation are implemented for multiranges.
For this purpose, stored multirange is approximated as union range without gaps.
This field will likely need improvements in the future.
Catversion is bumped.
Discussion: https://postgr.es/m/CALNJ-vSUpQ_Y%3DjXvTxt1VYFztaBSsWVXeF1y6gTYQ4bOiWDLgQ%40mail.gmail.com
Discussion: https://postgr.es/m/a0b8026459d1e6167933be2104a6174e7d40d0ab.camel%40j-davis.com#fe7218c83b08068bfffb0c5293eceda0
Author: Paul Jungwirth, revised by me
Reviewed-by: David Fetter, Corey Huinker, Jeff Davis, Pavel Stehule
Reviewed-by: Alvaro Herrera, Tom Lane, Isaac Morland, David G. Johnston
Reviewed-by: Zhihong Yu, Alexander Korotkov
2020-12-20 05:20:33 +01:00
|
|
|
<literal>_multirange</literal> suffix to the range type name.
|
|
|
|
</para>
|
2011-11-03 12:16:28 +01:00
|
|
|
</refsect2>
|
|
|
|
|
2002-08-29 02:17:06 +02:00
|
|
|
<refsect2>
|
|
|
|
<title>Base Types</title>
|
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<para>
|
2011-11-03 12:16:28 +01:00
|
|
|
The fourth form of <command>CREATE TYPE</command> creates a new base type
|
2008-07-31 18:27:16 +02:00
|
|
|
(scalar type). To create a new base type, you must be a superuser.
|
|
|
|
(This restriction is made because an erroneous type definition could
|
|
|
|
confuse or even crash the server.)
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The parameters can appear in any order, not only that
|
2003-05-09 00:19:58 +02:00
|
|
|
illustrated above, and most are optional. You must register
|
|
|
|
two or more functions (using <command>CREATE FUNCTION</command>) before
|
2006-12-30 22:21:56 +01:00
|
|
|
defining the type. The support functions
|
2003-05-09 00:19:58 +02:00
|
|
|
<replaceable class="parameter">input_function</replaceable> and
|
|
|
|
<replaceable class="parameter">output_function</replaceable>
|
|
|
|
are required, while the functions
|
2004-02-13 00:41:04 +01:00
|
|
|
<replaceable class="parameter">receive_function</replaceable>,
|
2006-12-30 22:21:56 +01:00
|
|
|
<replaceable class="parameter">send_function</replaceable>,
|
|
|
|
<replaceable class="parameter">type_modifier_input_function</replaceable>,
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
<replaceable class="parameter">type_modifier_output_function</replaceable>,
|
|
|
|
<replaceable class="parameter">analyze_function</replaceable>, and
|
|
|
|
<replaceable class="parameter">subscript_function</replaceable>
|
2003-05-09 00:19:58 +02:00
|
|
|
are optional. Generally these functions have to be coded in C
|
|
|
|
or another low-level language.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The <replaceable class="parameter">input_function</replaceable>
|
|
|
|
converts the type's external textual representation to the internal
|
2006-04-04 21:35:37 +02:00
|
|
|
representation used by the operators and functions defined for the type.
|
1998-07-29 08:23:26 +02:00
|
|
|
<replaceable class="parameter">output_function</replaceable>
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
performs the reverse transformation. The input function can be
|
2002-08-22 02:01:51 +02:00
|
|
|
declared as taking one argument of type <type>cstring</type>,
|
2001-11-03 22:42:47 +01:00
|
|
|
or as taking three arguments of types
|
2003-04-22 12:08:08 +02:00
|
|
|
<type>cstring</type>, <type>oid</type>, <type>integer</type>.
|
|
|
|
The first argument is the input text as a C string, the second
|
2005-08-12 23:49:47 +02:00
|
|
|
argument is the type's own OID (except for array types, which instead
|
|
|
|
receive their element type's OID),
|
2017-10-09 03:44:17 +02:00
|
|
|
and the third is the <literal>typmod</literal> of the destination column, if known
|
2005-01-04 01:39:53 +01:00
|
|
|
(-1 will be passed if not).
|
2005-05-01 20:56:19 +02:00
|
|
|
The input function must return a value of the data type itself.
|
2006-04-04 21:35:37 +02:00
|
|
|
Usually, an input function should be declared STRICT; if it is not,
|
|
|
|
it will be called with a NULL first parameter when reading a NULL
|
|
|
|
input value. The function must still return NULL in this case, unless
|
|
|
|
it raises an error.
|
|
|
|
(This case is mainly meant to support domain input functions, which
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
might need to reject NULL inputs.)
|
2005-05-01 20:56:19 +02:00
|
|
|
The output function must be
|
|
|
|
declared as taking one argument of the new data type.
|
|
|
|
The output function must return type <type>cstring</type>.
|
2006-04-04 21:35:37 +02:00
|
|
|
Output functions are not invoked for NULL values.
|
1998-07-29 08:23:26 +02:00
|
|
|
</para>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
2003-05-09 00:19:58 +02:00
|
|
|
<para>
|
|
|
|
The optional <replaceable class="parameter">receive_function</replaceable>
|
|
|
|
converts the type's external binary representation to the internal
|
|
|
|
representation. If this function is not supplied, the type cannot
|
|
|
|
participate in binary input. The binary representation should be
|
|
|
|
chosen to be cheap to convert to internal form, while being reasonably
|
2003-11-01 02:56:29 +01:00
|
|
|
portable. (For example, the standard integer data types use network
|
2003-05-09 00:19:58 +02:00
|
|
|
byte order as the external binary representation, while the internal
|
|
|
|
representation is in the machine's native byte order.) The receive
|
|
|
|
function should perform adequate checking to ensure that the value is
|
|
|
|
valid.
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
The receive function can be declared as taking one argument of type
|
2005-07-10 23:14:00 +02:00
|
|
|
<type>internal</type>, or as taking three arguments of types
|
|
|
|
<type>internal</type>, <type>oid</type>, <type>integer</type>.
|
|
|
|
The first argument is a pointer to a <type>StringInfo</type> buffer
|
|
|
|
holding the received byte string; the optional arguments are the
|
|
|
|
same as for the text input function.
|
|
|
|
The receive function must return a value of the data type itself.
|
2006-04-04 21:35:37 +02:00
|
|
|
Usually, a receive function should be declared STRICT; if it is not,
|
|
|
|
it will be called with a NULL first parameter when reading a NULL
|
|
|
|
input value. The function must still return NULL in this case, unless
|
|
|
|
it raises an error.
|
|
|
|
(This case is mainly meant to support domain receive functions, which
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
might need to reject NULL inputs.)
|
2005-07-10 23:14:00 +02:00
|
|
|
Similarly, the optional
|
2003-05-09 00:19:58 +02:00
|
|
|
<replaceable class="parameter">send_function</replaceable> converts
|
|
|
|
from the internal representation to the external binary representation.
|
|
|
|
If this function is not supplied, the type cannot participate in binary
|
2005-05-01 20:56:19 +02:00
|
|
|
output. The send function must be
|
|
|
|
declared as taking one argument of the new data type.
|
2003-05-10 01:01:45 +02:00
|
|
|
The send function must return type <type>bytea</type>.
|
2006-04-04 21:35:37 +02:00
|
|
|
Send functions are not invoked for NULL values.
|
2003-05-09 00:19:58 +02:00
|
|
|
</para>
|
|
|
|
|
2002-08-22 02:01:51 +02:00
|
|
|
<para>
|
|
|
|
You should at this point be wondering how the input and output functions
|
2006-02-28 23:37:27 +01:00
|
|
|
can be declared to have results or arguments of the new type, when they
|
|
|
|
have to be created before the new type can be created. The answer is that
|
2017-10-09 03:44:17 +02:00
|
|
|
the type should first be defined as a <firstterm>shell type</firstterm>, which is a
|
2006-02-28 23:37:27 +01:00
|
|
|
placeholder type that has no properties except a name and an owner. This
|
|
|
|
is done by issuing the command <literal>CREATE TYPE
|
2017-10-09 03:44:17 +02:00
|
|
|
<replaceable>name</replaceable></literal>, with no additional parameters. Then the
|
2015-03-20 23:48:52 +01:00
|
|
|
C I/O functions can be defined referencing the shell type. Finally,
|
2017-10-09 03:44:17 +02:00
|
|
|
<command>CREATE TYPE</command> with a full definition replaces the shell entry
|
2006-02-28 23:37:27 +01:00
|
|
|
with a complete, valid type definition, after which the new type can be
|
|
|
|
used normally.
|
2002-08-22 02:01:51 +02:00
|
|
|
</para>
|
|
|
|
|
2006-12-30 22:21:56 +01:00
|
|
|
<para>
|
|
|
|
The optional
|
|
|
|
<replaceable class="parameter">type_modifier_input_function</replaceable>
|
|
|
|
and <replaceable class="parameter">type_modifier_output_function</replaceable>
|
|
|
|
are needed if the type supports modifiers, that is optional constraints
|
2017-10-09 03:44:17 +02:00
|
|
|
attached to a type declaration, such as <literal>char(5)</literal> or
|
|
|
|
<literal>numeric(30,2)</literal>. <productname>PostgreSQL</productname> allows
|
2007-06-15 22:56:52 +02:00
|
|
|
user-defined types to take one or more simple constants or identifiers as
|
2008-05-27 20:05:13 +02:00
|
|
|
modifiers. However, this information must be capable of being packed into a
|
2007-06-15 22:56:52 +02:00
|
|
|
single non-negative integer value for storage in the system catalogs. The
|
2006-12-30 22:21:56 +01:00
|
|
|
<replaceable class="parameter">type_modifier_input_function</replaceable>
|
2017-10-09 03:44:17 +02:00
|
|
|
is passed the declared modifier(s) in the form of a <type>cstring</type>
|
2006-12-30 22:21:56 +01:00
|
|
|
array. It must check the values for validity (throwing an error if they
|
|
|
|
are wrong), and if they are correct, return a single non-negative
|
2017-10-09 03:44:17 +02:00
|
|
|
<type>integer</type> value that will be stored as the column <quote>typmod</quote>.
|
2006-12-30 22:21:56 +01:00
|
|
|
Type modifiers will be rejected if the type does not have a
|
|
|
|
<replaceable class="parameter">type_modifier_input_function</replaceable>.
|
|
|
|
The <replaceable class="parameter">type_modifier_output_function</replaceable>
|
|
|
|
converts the internal integer typmod value back to the correct form for
|
2017-10-09 03:44:17 +02:00
|
|
|
user display. It must return a <type>cstring</type> value that is the exact
|
|
|
|
string to append to the type name; for example <type>numeric</type>'s
|
|
|
|
function might return <literal>(30,2)</literal>.
|
2006-12-30 22:21:56 +01:00
|
|
|
It is allowed to omit the
|
|
|
|
<replaceable class="parameter">type_modifier_output_function</replaceable>,
|
2008-05-27 20:05:13 +02:00
|
|
|
in which case the default display format is just the stored typmod integer
|
|
|
|
value enclosed in parentheses.
|
2006-12-30 22:21:56 +01:00
|
|
|
</para>
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
<para>
|
|
|
|
The optional <replaceable class="parameter">analyze_function</replaceable>
|
|
|
|
performs type-specific statistics collection for columns of the data type.
|
2017-10-09 03:44:17 +02:00
|
|
|
By default, <command>ANALYZE</command> will attempt to gather statistics using
|
|
|
|
the type's <quote>equals</quote> and <quote>less-than</quote> operators, if there
|
2004-02-13 00:41:04 +01:00
|
|
|
is a default b-tree operator class for the type. For non-scalar types
|
|
|
|
this behavior is likely to be unsuitable, so it can be overridden by
|
|
|
|
specifying a custom analysis function. The analysis function must be
|
2017-10-09 03:44:17 +02:00
|
|
|
declared to take a single argument of type <type>internal</type>, and return
|
|
|
|
a <type>boolean</type> result. The detailed API for analysis functions appears
|
|
|
|
in <filename>src/include/commands/vacuum.h</filename>.
|
2004-02-13 00:41:04 +01:00
|
|
|
</para>
|
|
|
|
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
<para>
|
|
|
|
The optional <replaceable class="parameter">subscript_function</replaceable>
|
|
|
|
allows the data type to be subscripted in SQL commands. Specifying this
|
|
|
|
function does not cause the type to be considered a <quote>true</quote>
|
|
|
|
array type; for example, it will not be a candidate for the result type
|
|
|
|
of <literal>ARRAY[]</literal> constructs. But if subscripting a value
|
|
|
|
of the type is a natural notation for extracting data from it, then
|
|
|
|
a <replaceable class="parameter">subscript_function</replaceable> can
|
|
|
|
be written to define what that means. The subscript function must be
|
|
|
|
declared to take a single argument of type <type>internal</type>, and
|
|
|
|
return an <type>internal</type> result, which is a pointer to a struct
|
|
|
|
of methods (functions) that implement subscripting.
|
|
|
|
The detailed API for subscript functions appears
|
2020-12-12 00:58:07 +01:00
|
|
|
in <filename>src/include/nodes/subscripting.h</filename>.
|
|
|
|
It may also be useful to read the array implementation
|
|
|
|
in <filename>src/backend/utils/adt/arraysubs.c</filename>,
|
|
|
|
or the simpler code
|
|
|
|
in <filename>contrib/hstore/hstore_subs.c</filename>.
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
Additional information appears in
|
|
|
|
<xref linkend="sql-createtype-array"/> below.
|
|
|
|
</para>
|
|
|
|
|
2002-08-22 02:01:51 +02:00
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
While the details of the new type's internal representation are only
|
|
|
|
known to the I/O functions and other functions you create to work with
|
|
|
|
the type, there are several properties of the internal representation
|
|
|
|
that must be declared to <productname>PostgreSQL</productname>.
|
|
|
|
Foremost of these is
|
|
|
|
<replaceable class="parameter">internallength</replaceable>.
|
2003-04-22 12:08:08 +02:00
|
|
|
Base data types can be fixed-length, in which case
|
1998-07-29 08:23:26 +02:00
|
|
|
<replaceable class="parameter">internallength</replaceable> is a
|
2015-10-08 18:29:25 +02:00
|
|
|
positive integer, or variable-length, indicated by setting
|
1998-07-29 08:23:26 +02:00
|
|
|
<replaceable class="parameter">internallength</replaceable>
|
2003-04-22 12:08:08 +02:00
|
|
|
to <literal>VARIABLE</literal>. (Internally, this is represented
|
2017-10-09 03:44:17 +02:00
|
|
|
by setting <literal>typlen</literal> to -1.) The internal representation of all
|
2003-04-22 12:08:08 +02:00
|
|
|
variable-length types must start with a 4-byte integer giving the total
|
2015-02-19 04:33:39 +01:00
|
|
|
length of this value of the type. (Note that the length field is often
|
2017-11-23 15:39:47 +01:00
|
|
|
encoded, as described in <xref linkend="storage-toast"/>; it's unwise
|
2015-02-19 04:33:39 +01:00
|
|
|
to access it directly.)
|
2001-11-03 22:42:47 +01:00
|
|
|
</para>
|
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
<para>
|
2003-04-22 12:08:08 +02:00
|
|
|
The optional flag <literal>PASSEDBYVALUE</literal> indicates that
|
2003-05-09 00:19:58 +02:00
|
|
|
values of this data type are passed by value, rather than by
|
2015-02-19 04:33:39 +01:00
|
|
|
reference. Types passed by value must be fixed-length, and their internal
|
2017-10-09 03:44:17 +02:00
|
|
|
representation cannot be larger than the size of the <type>Datum</type> type
|
2015-02-19 04:33:39 +01:00
|
|
|
(4 bytes on some machines, 8 bytes on others).
|
2001-11-03 22:42:47 +01:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
2003-04-22 12:08:08 +02:00
|
|
|
The <replaceable class="parameter">alignment</replaceable> parameter
|
2002-01-20 23:19:57 +01:00
|
|
|
specifies the storage alignment required for the data type. The
|
2001-11-03 22:42:47 +01:00
|
|
|
allowed values equate to alignment on 1, 2, 4, or 8 byte boundaries.
|
|
|
|
Note that variable-length types must have an alignment of at least
|
2017-10-09 03:44:17 +02:00
|
|
|
4, since they necessarily contain an <type>int4</type> as their first component.
|
1998-07-29 08:23:26 +02:00
|
|
|
</para>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
2000-08-25 01:36:29 +02:00
|
|
|
<para>
|
2003-04-22 12:08:08 +02:00
|
|
|
The <replaceable class="parameter">storage</replaceable> parameter
|
|
|
|
allows selection of storage strategies for variable-length data
|
|
|
|
types. (Only <literal>plain</literal> is allowed for fixed-length
|
|
|
|
types.) <literal>plain</literal> specifies that data of the type
|
|
|
|
will always be stored in-line and not compressed.
|
|
|
|
<literal>extended</literal> specifies that the system will first
|
|
|
|
try to compress a long data value, and will move the value out of
|
2000-08-25 01:36:29 +02:00
|
|
|
the main table row if it's still too long.
|
2003-04-22 12:08:08 +02:00
|
|
|
<literal>external</literal> allows the value to be moved out of the
|
|
|
|
main table, but the system will not try to compress it.
|
|
|
|
<literal>main</literal> allows compression, but discourages moving
|
|
|
|
the value out of the main table. (Data items with this storage
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
strategy might still be moved out of the main table if there is no
|
2003-04-22 12:08:08 +02:00
|
|
|
other way to make a row fit, but they will be kept in the main
|
|
|
|
table preferentially over <literal>extended</literal> and
|
|
|
|
<literal>external</literal> items.)
|
2000-08-25 01:36:29 +02:00
|
|
|
</para>
|
2002-08-29 02:17:06 +02:00
|
|
|
|
2015-02-19 04:33:39 +01:00
|
|
|
<para>
|
|
|
|
All <replaceable class="parameter">storage</replaceable> values other
|
|
|
|
than <literal>plain</literal> imply that the functions of the data type
|
2017-10-09 03:44:17 +02:00
|
|
|
can handle values that have been <firstterm>toasted</firstterm>, as described
|
2017-11-23 15:39:47 +01:00
|
|
|
in <xref linkend="storage-toast"/> and <xref linkend="xtypes-toast"/>.
|
2015-02-19 04:33:39 +01:00
|
|
|
The specific other value given merely determines the default TOAST
|
|
|
|
storage strategy for columns of a toastable data type; users can pick
|
|
|
|
other strategies for individual columns using <literal>ALTER TABLE
|
2017-10-09 03:44:17 +02:00
|
|
|
SET STORAGE</literal>.
|
2015-02-19 04:33:39 +01:00
|
|
|
</para>
|
|
|
|
|
2008-11-30 20:01:29 +01:00
|
|
|
<para>
|
|
|
|
The <replaceable class="parameter">like_type</replaceable> parameter
|
|
|
|
provides an alternative method for specifying the basic representation
|
|
|
|
properties of a data type: copy them from some existing type. The values of
|
|
|
|
<replaceable class="parameter">internallength</replaceable>,
|
|
|
|
<replaceable class="parameter">passedbyvalue</replaceable>,
|
|
|
|
<replaceable class="parameter">alignment</replaceable>, and
|
|
|
|
<replaceable class="parameter">storage</replaceable> are copied from the
|
|
|
|
named type. (It is possible, though usually undesirable, to override
|
2017-10-09 03:44:17 +02:00
|
|
|
some of these values by specifying them along with the <literal>LIKE</literal>
|
2008-11-30 20:01:29 +01:00
|
|
|
clause.) Specifying representation this way is especially useful when
|
2017-10-09 03:44:17 +02:00
|
|
|
the low-level implementation of the new type <quote>piggybacks</quote> on an
|
2008-11-30 20:01:29 +01:00
|
|
|
existing type in some fashion.
|
|
|
|
</para>
|
|
|
|
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
<para>
|
|
|
|
The <replaceable class="parameter">category</replaceable> and
|
|
|
|
<replaceable class="parameter">preferred</replaceable> parameters can be
|
|
|
|
used to help control which implicit cast will be applied in ambiguous
|
|
|
|
situations. Each data type belongs to a category named by a single ASCII
|
2017-10-09 03:44:17 +02:00
|
|
|
character, and each type is either <quote>preferred</quote> or not within its
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
category. The parser will prefer casting to preferred types (but only from
|
|
|
|
other types within the same category) when this rule is helpful in
|
|
|
|
resolving overloaded functions or operators. For more details see <xref
|
2017-11-23 15:39:47 +01:00
|
|
|
linkend="typeconv"/>. For types that have no implicit casts to or from any
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
other types, it is sufficient to leave these settings at the defaults.
|
|
|
|
However, for a group of related types that have implicit casts, it is often
|
|
|
|
helpful to mark them all as belonging to a category and select one or two
|
2017-10-09 03:44:17 +02:00
|
|
|
of the <quote>most general</quote> types as being preferred within the category.
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
The <replaceable class="parameter">category</replaceable> parameter is
|
|
|
|
especially useful when adding a user-defined type to an existing built-in
|
|
|
|
category, such as the numeric or string types. However, it is also
|
|
|
|
possible to create new entirely-user-defined type categories. Select any
|
|
|
|
ASCII character other than an upper-case letter to name such a category.
|
|
|
|
</para>
|
|
|
|
|
2003-05-09 00:19:58 +02:00
|
|
|
<para>
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
A default value can be specified, in case a user wants columns of the
|
2003-05-09 00:19:58 +02:00
|
|
|
data type to default to something other than the null value.
|
|
|
|
Specify the default with the <literal>DEFAULT</literal> key word.
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
(Such a default can be overridden by an explicit <literal>DEFAULT</literal>
|
2003-05-09 00:19:58 +02:00
|
|
|
clause attached to a particular column.)
|
|
|
|
</para>
|
2000-08-25 01:36:29 +02:00
|
|
|
|
2002-08-15 18:36:08 +02:00
|
|
|
<para>
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
To indicate that a type is a fixed-length array type,
|
|
|
|
specify the type of the array
|
2017-10-09 03:44:17 +02:00
|
|
|
elements using the <literal>ELEMENT</literal> key word. For example, to
|
2003-05-09 00:19:58 +02:00
|
|
|
define an array of 4-byte integers (<type>int4</type>), specify
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
<literal>ELEMENT = int4</literal>. For more details,
|
|
|
|
see <xref linkend="sql-createtype-array"/> below.
|
2003-05-09 00:19:58 +02:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
To indicate the delimiter to be used between values in the external
|
|
|
|
representation of arrays of this type, <replaceable
|
|
|
|
class="parameter">delimiter</replaceable> can be
|
|
|
|
set to a specific character. The default delimiter is the comma
|
|
|
|
(<literal>,</literal>). Note that the delimiter is associated
|
|
|
|
with the array element type, not the array type itself.
|
2002-08-15 18:36:08 +02:00
|
|
|
</para>
|
2003-05-09 00:19:58 +02:00
|
|
|
|
2011-02-08 22:04:18 +01:00
|
|
|
<para>
|
2011-05-19 00:14:45 +02:00
|
|
|
If the optional Boolean
|
2011-02-08 22:04:18 +01:00
|
|
|
parameter <replaceable class="parameter">collatable</replaceable>
|
|
|
|
is true, column definitions and expressions of the type may carry
|
2011-04-22 23:43:18 +02:00
|
|
|
collation information through use of
|
2011-02-08 22:04:18 +01:00
|
|
|
the <literal>COLLATE</literal> clause. It is up to the
|
|
|
|
implementations of the functions operating on the type to actually
|
|
|
|
make use of the collation information; this does not happen
|
|
|
|
automatically merely by marking the type collatable.
|
|
|
|
</para>
|
2002-08-29 02:17:06 +02:00
|
|
|
</refsect2>
|
2002-08-15 18:36:08 +02:00
|
|
|
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
<refsect2 id="sql-createtype-array" xreflabel="Array Types">
|
1998-07-29 08:23:26 +02:00
|
|
|
<title>Array Types</title>
|
2001-11-03 22:42:47 +01:00
|
|
|
|
1998-07-29 08:23:26 +02:00
|
|
|
<para>
|
Support arrays of composite types, including the rowtypes of regular tables
and views (but not system catalogs, nor sequences or toast tables). Get rid
of the hardwired convention that a type's array type is named exactly "_type",
instead using a new column pg_type.typarray to provide the linkage. (It still
will be named "_type", though, except in odd corner cases such as
maximum-length type names.)
Along the way, make tracking of owner and schema dependencies for types more
uniform: a type directly created by the user has these dependencies, while a
table rowtype or auto-generated array type does not have them, but depends on
its parent object instead.
David Fetter, Andrew Dunstan, Tom Lane
2007-05-11 19:57:14 +02:00
|
|
|
Whenever a user-defined type is created,
|
2001-11-03 22:42:47 +01:00
|
|
|
<productname>PostgreSQL</productname> automatically creates an
|
2011-11-19 00:23:55 +01:00
|
|
|
associated array type, whose name consists of the element type's
|
Support arrays of composite types, including the rowtypes of regular tables
and views (but not system catalogs, nor sequences or toast tables). Get rid
of the hardwired convention that a type's array type is named exactly "_type",
instead using a new column pg_type.typarray to provide the linkage. (It still
will be named "_type", though, except in odd corner cases such as
maximum-length type names.)
Along the way, make tracking of owner and schema dependencies for types more
uniform: a type directly created by the user has these dependencies, while a
table rowtype or auto-generated array type does not have them, but depends on
its parent object instead.
David Fetter, Andrew Dunstan, Tom Lane
2007-05-11 19:57:14 +02:00
|
|
|
name prepended with an underscore, and truncated if necessary to keep
|
|
|
|
it less than <symbol>NAMEDATALEN</symbol> bytes long. (If the name
|
|
|
|
so generated collides with an existing type name, the process is
|
|
|
|
repeated until a non-colliding name is found.)
|
|
|
|
This implicitly-created array type is variable length and uses the
|
2017-10-09 03:44:17 +02:00
|
|
|
built-in input and output functions <literal>array_in</literal> and
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
<literal>array_out</literal>. Furthermore, this type is what the system
|
|
|
|
uses for constructs such as <literal>ARRAY[]</literal> over the
|
|
|
|
user-defined type. The array type tracks any changes in its
|
Support arrays of composite types, including the rowtypes of regular tables
and views (but not system catalogs, nor sequences or toast tables). Get rid
of the hardwired convention that a type's array type is named exactly "_type",
instead using a new column pg_type.typarray to provide the linkage. (It still
will be named "_type", though, except in odd corner cases such as
maximum-length type names.)
Along the way, make tracking of owner and schema dependencies for types more
uniform: a type directly created by the user has these dependencies, while a
table rowtype or auto-generated array type does not have them, but depends on
its parent object instead.
David Fetter, Andrew Dunstan, Tom Lane
2007-05-11 19:57:14 +02:00
|
|
|
element type's owner or schema, and is dropped if the element type is.
|
2001-11-03 22:42:47 +01:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
2017-10-09 03:44:17 +02:00
|
|
|
You might reasonably ask why there is an <option>ELEMENT</option>
|
2003-04-22 12:08:08 +02:00
|
|
|
option, if the system makes the correct array type automatically.
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
The main case where it's useful to use <option>ELEMENT</option> is when you are
|
2003-04-22 12:08:08 +02:00
|
|
|
making a fixed-length type that happens to be internally an array of a number of
|
|
|
|
identical things, and you want to allow these things to be accessed
|
2001-11-03 22:42:47 +01:00
|
|
|
directly by subscripting, in addition to whatever operations you plan
|
2017-10-09 03:44:17 +02:00
|
|
|
to provide for the type as a whole. For example, type <type>point</type>
|
2015-02-19 04:33:39 +01:00
|
|
|
is represented as just two floating-point numbers, which can be accessed
|
2017-10-09 03:44:17 +02:00
|
|
|
using <literal>point[0]</literal> and <literal>point[1]</literal>.
|
2001-11-03 22:42:47 +01:00
|
|
|
Note that
|
|
|
|
this facility only works for fixed-length types whose internal form
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
is exactly a sequence of identical fixed-length fields.
|
2001-11-03 22:42:47 +01:00
|
|
|
For historical reasons (i.e., this is clearly wrong but it's far too
|
|
|
|
late to change it), subscripting of fixed-length array types starts from
|
|
|
|
zero, rather than from one as for variable-length arrays.
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
|
|
|
|
<para>
|
|
|
|
Specifying the <option>SUBSCRIPT</option> option allows a data type to
|
|
|
|
be subscripted, even though the system does not otherwise regard it as
|
|
|
|
an array type. The behavior just described for fixed-length arrays is
|
|
|
|
actually implemented by the <option>SUBSCRIPT</option> handler
|
|
|
|
function <function>raw_array_subscript_handler</function>, which is
|
|
|
|
used automatically if you specify <option>ELEMENT</option> for a
|
|
|
|
fixed-length type without also writing <option>SUBSCRIPT</option>.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
When specifying a custom <option>SUBSCRIPT</option> function, it is
|
|
|
|
not necessary to specify <option>ELEMENT</option> unless
|
|
|
|
the <option>SUBSCRIPT</option> handler function needs to
|
|
|
|
consult <structfield>typelem</structfield> to find out what to return.
|
|
|
|
Be aware that specifying <option>ELEMENT</option> causes the system to
|
|
|
|
assume that the new type contains, or is somehow physically dependent on,
|
|
|
|
the element type; thus for example changing properties of the element
|
|
|
|
type won't be allowed if there are any columns of the dependent type.
|
|
|
|
</para>
|
1998-07-29 08:23:26 +02:00
|
|
|
</refsect2>
|
|
|
|
</refsect1>
|
2006-12-30 22:21:56 +01:00
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
<refsect1>
|
2003-05-09 00:19:58 +02:00
|
|
|
<title>Parameters</title>
|
2003-04-22 12:08:08 +02:00
|
|
|
|
|
|
|
<variablelist>
|
|
|
|
<varlistentry>
|
2003-09-22 02:16:58 +02:00
|
|
|
<term><replaceable class="parameter">name</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name (optionally schema-qualified) of a type to be created.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2003-05-09 00:19:58 +02:00
|
|
|
<term><replaceable class="parameter">attribute_name</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
The name of an attribute (column) for the composite type.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">data_type</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of an existing data type to become a column of the
|
|
|
|
composite type.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">collation</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of an existing collation to be associated with a column of
|
|
|
|
a composite type, or with a range type.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2007-04-02 05:49:42 +02:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">label</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
A string literal representing the textual label associated with
|
|
|
|
one value of an enum type.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">subtype</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of the element type that the range type will represent ranges
|
|
|
|
of.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">subtype_operator_class</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2011-11-24 01:13:56 +01:00
|
|
|
The name of a b-tree operator class for the subtype.
|
2011-11-19 00:23:55 +01:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">canonical_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of the canonicalization function for the range type.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">subtype_diff_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of a difference function for the subtype.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
Multirange datatypes
Multiranges are basically sorted arrays of non-overlapping ranges with
set-theoretic operations defined over them.
Since v14, each range type automatically gets a corresponding multirange
datatype. There are both manual and automatic mechanisms for naming multirange
types. Once can specify multirange type name using multirange_type_name
attribute in CREATE TYPE. Otherwise, a multirange type name is generated
automatically. If the range type name contains "range" then we change that to
"multirange". Otherwise, we add "_multirange" to the end.
Implementation of multiranges comes with a space-efficient internal
representation format, which evades extra paddings and duplicated storage of
oids. Altogether this format allows fetching a particular range by its index
in O(n).
Statistic gathering and selectivity estimation are implemented for multiranges.
For this purpose, stored multirange is approximated as union range without gaps.
This field will likely need improvements in the future.
Catversion is bumped.
Discussion: https://postgr.es/m/CALNJ-vSUpQ_Y%3DjXvTxt1VYFztaBSsWVXeF1y6gTYQ4bOiWDLgQ%40mail.gmail.com
Discussion: https://postgr.es/m/a0b8026459d1e6167933be2104a6174e7d40d0ab.camel%40j-davis.com#fe7218c83b08068bfffb0c5293eceda0
Author: Paul Jungwirth, revised by me
Reviewed-by: David Fetter, Corey Huinker, Jeff Davis, Pavel Stehule
Reviewed-by: Alvaro Herrera, Tom Lane, Isaac Morland, David G. Johnston
Reviewed-by: Zhihong Yu, Alexander Korotkov
2020-12-20 05:20:33 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">multirange_type_name</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of the corresponding multirange type.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">input_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of a function that converts data from the type's
|
2003-05-09 00:19:58 +02:00
|
|
|
external textual form to its internal form.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">output_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of a function that converts data from the type's
|
2003-05-09 00:19:58 +02:00
|
|
|
internal form to its external textual form.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2003-05-09 00:19:58 +02:00
|
|
|
<term><replaceable class="parameter">receive_function</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
The name of a function that converts data from the type's
|
|
|
|
external binary form to its internal form.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2003-05-09 00:19:58 +02:00
|
|
|
<term><replaceable class="parameter">send_function</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
The name of a function that converts data from the type's
|
|
|
|
internal form to its external binary form.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2006-12-30 22:21:56 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">type_modifier_input_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2008-05-27 20:05:13 +02:00
|
|
|
The name of a function that converts an array of modifier(s) for the type
|
2006-12-30 22:21:56 +01:00
|
|
|
into internal form.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">type_modifier_output_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of a function that converts the internal form of the type's
|
|
|
|
modifier(s) to external textual form.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2004-02-13 00:41:04 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">analyze_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of a function that performs statistical analysis for the
|
|
|
|
data type.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
Support subscripting of arbitrary types, not only arrays.
This patch generalizes the subscripting infrastructure so that any
data type can be subscripted, if it provides a handler function to
define what that means. Traditional variable-length (varlena) arrays
all use array_subscript_handler(), while the existing fixed-length
types that support subscripting use raw_array_subscript_handler().
It's expected that other types that want to use subscripting notation
will define their own handlers. (This patch provides no such new
features, though; it only lays the foundation for them.)
To do this, move the parser's semantic processing of subscripts
(including coercion to whatever data type is required) into a
method callback supplied by the handler. On the execution side,
replace the ExecEvalSubscriptingRef* layer of functions with direct
calls to callback-supplied execution routines. (Thus, essentially
no new run-time overhead should be caused by this patch. Indeed,
there is room to remove some overhead by supplying specialized
execution routines. This patch does a little bit in that line,
but more could be done.)
Additional work is required here and there to remove formerly
hard-wired assumptions about the result type, collation, etc
of a SubscriptingRef expression node; and to remove assumptions
that the subscript values must be integers.
One useful side-effect of this is that we now have a less squishy
mechanism for identifying whether a data type is a "true" array:
instead of wiring in weird rules about typlen, we can look to see
if pg_type.typsubscript == F_ARRAY_SUBSCRIPT_HANDLER. For this
to be bulletproof, we have to forbid user-defined types from using
that handler directly; but there seems no good reason for them to
do so.
This patch also removes assumptions that the number of subscripts
is limited to MAXDIM (6), or indeed has any hard-wired limit.
That limit still applies to types handled by array_subscript_handler
or raw_array_subscript_handler, but to discourage other dependencies
on this constant, I've moved it from c.h to utils/array.h.
Dmitry Dolgov, reviewed at various times by Tom Lane, Arthur Zakirov,
Peter Eisentraut, Pavel Stehule
Discussion: https://postgr.es/m/CA+q6zcVDuGBv=M0FqBYX8DPebS3F_0KQ6OVFobGJPM507_SZ_w@mail.gmail.com
Discussion: https://postgr.es/m/CA+q6zcVovR+XY4mfk-7oNk-rF91gH0PebnNfuUjuuDsyHjOcVA@mail.gmail.com
2020-12-09 18:40:37 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">subscript_function</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of a function that defines what subscripting a value of the
|
|
|
|
data type does.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
<varlistentry>
|
2003-05-09 00:19:58 +02:00
|
|
|
<term><replaceable class="parameter">internallength</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
A numeric constant that specifies the length in bytes of the new
|
|
|
|
type's internal representation. The default assumption is that
|
|
|
|
it is variable-length.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">alignment</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The storage alignment requirement of the data type. If specified,
|
|
|
|
it must be <literal>char</literal>, <literal>int2</literal>,
|
|
|
|
<literal>int4</literal>, or <literal>double</literal>; the
|
|
|
|
default is <literal>int4</literal>.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">storage</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
The storage strategy for the data type. If specified, must be
|
2003-04-22 12:08:08 +02:00
|
|
|
<literal>plain</literal>, <literal>external</literal>,
|
|
|
|
<literal>extended</literal>, or <literal>main</literal>; the
|
|
|
|
default is <literal>plain</literal>.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2008-11-30 20:01:29 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">like_type</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The name of an existing data type that the new type will have the
|
|
|
|
same representation as. The values of
|
|
|
|
<replaceable class="parameter">internallength</replaceable>,
|
|
|
|
<replaceable class="parameter">passedbyvalue</replaceable>,
|
|
|
|
<replaceable class="parameter">alignment</replaceable>, and
|
|
|
|
<replaceable class="parameter">storage</replaceable>
|
|
|
|
are copied from that type, unless overridden by explicit
|
2017-10-09 03:44:17 +02:00
|
|
|
specification elsewhere in this <command>CREATE TYPE</command> command.
|
2008-11-30 20:01:29 +01:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">category</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The category code (a single ASCII character) for this type.
|
2017-10-09 03:44:17 +02:00
|
|
|
The default is <literal>'U'</literal> for <quote>user-defined type</quote>.
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
Other standard category codes can be found in
|
2017-11-23 15:39:47 +01:00
|
|
|
<xref linkend="catalog-typcategory-table"/>. You may also choose
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
other ASCII characters in order to create custom categories.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">preferred</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
True if this type is a preferred type within its type category,
|
2008-07-30 21:35:13 +02:00
|
|
|
else false. The default is false. Be very careful about creating
|
|
|
|
a new preferred type within an existing type category, as this
|
|
|
|
could cause surprising changes in behavior.
|
Replace the hard-wired type knowledge in TypeCategory() and IsPreferredType()
with system catalog lookups, as was foreseen to be necessary almost since
their creation. Instead put the information into two new pg_type columns,
typcategory and typispreferred. Add support for setting these when
creating a user-defined base type.
The category column is just a "char" (i.e. a poor man's enum), allowing
a crude form of user extensibility of the category list: just use an
otherwise-unused character. This seems sufficient for foreseen uses,
but we could upgrade to having an actual category catalog someday, if
there proves to be a huge demand for custom type categories.
In this patch I have attempted to hew exactly to the behavior of the
previous hardwired logic, except for introducing new type categories for
arrays, composites, and enums. In particular the default preferred state
for user-defined types remains TRUE. That seems worth revisiting, but it
should be done as a separate patch from introducing the infrastructure.
Likewise, any adjustment of the standard set of categories should be done
separately.
2008-07-30 19:05:05 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
<varlistentry>
|
2003-05-09 00:19:58 +02:00
|
|
|
<term><replaceable class="parameter">default</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
The default value for the data type. If this is omitted, the
|
|
|
|
default is null.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
2003-05-09 00:19:58 +02:00
|
|
|
<term><replaceable class="parameter">element</replaceable></term>
|
2003-04-22 12:08:08 +02:00
|
|
|
<listitem>
|
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
The type being created is an array; this specifies the type of
|
|
|
|
the array elements.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">delimiter</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
The delimiter character to be used between values in arrays made
|
|
|
|
of this type.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
2011-03-02 22:11:05 +01:00
|
|
|
|
|
|
|
<varlistentry>
|
|
|
|
<term><replaceable class="parameter">collatable</replaceable></term>
|
|
|
|
<listitem>
|
|
|
|
<para>
|
|
|
|
True if this type's operations can use collation information.
|
|
|
|
The default is false.
|
|
|
|
</para>
|
|
|
|
</listitem>
|
|
|
|
</varlistentry>
|
2003-04-22 12:08:08 +02:00
|
|
|
</variablelist>
|
|
|
|
</refsect1>
|
|
|
|
|
2017-10-20 03:16:39 +02:00
|
|
|
<refsect1 id="sql-createtype-notes">
|
2001-09-13 21:05:29 +02:00
|
|
|
<title>Notes</title>
|
|
|
|
|
2006-01-13 19:06:45 +01:00
|
|
|
<para>
|
|
|
|
Because there are no restrictions on use of a data type once it's been
|
2011-11-19 00:23:55 +01:00
|
|
|
created, creating a base type or range type is tantamount to granting
|
|
|
|
public execute permission on the functions mentioned in the type definition.
|
2008-07-31 18:27:16 +02:00
|
|
|
This is usually
|
2006-01-13 19:06:45 +01:00
|
|
|
not an issue for the sorts of functions that are useful in a type
|
|
|
|
definition. But you might want to think twice before designing a type
|
2017-10-09 03:44:17 +02:00
|
|
|
in a way that would require <quote>secret</quote> information to be used
|
2006-01-13 19:06:45 +01:00
|
|
|
while converting it to or from external form.
|
|
|
|
</para>
|
|
|
|
|
2007-05-12 02:55:00 +02:00
|
|
|
<para>
|
|
|
|
Before <productname>PostgreSQL</productname> version 8.3, the name of
|
|
|
|
a generated array type was always exactly the element type's name with one
|
|
|
|
underscore character (<literal>_</literal>) prepended. (Type names were
|
2021-02-24 08:13:17 +01:00
|
|
|
therefore restricted in length to one fewer character than other names.)
|
2007-05-12 02:55:00 +02:00
|
|
|
While this is still usually the case, the array type name may vary from
|
|
|
|
this in case of maximum-length names or collisions with user type names
|
|
|
|
that begin with underscore. Writing code that depends on this convention
|
|
|
|
is therefore deprecated. Instead, use
|
2017-10-09 03:44:17 +02:00
|
|
|
<structname>pg_type</structname>.<structfield>typarray</structfield> to locate the array type
|
2007-05-12 02:55:00 +02:00
|
|
|
associated with a given type.
|
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
It may be advisable to avoid using type and table names that begin with
|
|
|
|
underscore. While the server will change generated array type names to
|
|
|
|
avoid collisions with user-given names, there is still risk of confusion,
|
|
|
|
particularly with old client software that may assume that type names
|
|
|
|
beginning with underscores always represent arrays.
|
|
|
|
</para>
|
|
|
|
|
2006-02-28 23:37:27 +01:00
|
|
|
<para>
|
2011-11-19 00:23:55 +01:00
|
|
|
Before <productname>PostgreSQL</productname> version 8.2, the shell-type
|
|
|
|
creation syntax
|
2017-10-09 03:44:17 +02:00
|
|
|
<literal>CREATE TYPE <replaceable>name</replaceable></literal> did not exist.
|
2006-02-28 23:37:27 +01:00
|
|
|
The way to create a new base type was to create its input function first.
|
|
|
|
In this approach, <productname>PostgreSQL</productname> will first see
|
|
|
|
the name of the new data type as the return type of the input function.
|
|
|
|
The shell type is implicitly created in this situation, and then it
|
|
|
|
can be referenced in the definitions of the remaining I/O functions.
|
Update reference documentation on may/can/might:
Standard English uses "may", "can", and "might" in different ways:
may - permission, "You may borrow my rake."
can - ability, "I can lift that log."
might - possibility, "It might rain today."
Unfortunately, in conversational English, their use is often mixed, as
in, "You may use this variable to do X", when in fact, "can" is a better
choice. Similarly, "It may crash" is better stated, "It might crash".
2007-02-01 00:26:05 +01:00
|
|
|
This approach still works, but is deprecated and might be disallowed in
|
2006-02-28 23:37:27 +01:00
|
|
|
some future release. Also, to avoid accidentally cluttering
|
|
|
|
the catalogs with shell types as a result of simple typos in function
|
|
|
|
definitions, a shell type will only be made this way when the input
|
|
|
|
function is written in C.
|
|
|
|
</para>
|
|
|
|
|
2022-12-09 15:58:38 +01:00
|
|
|
<para>
|
|
|
|
In <productname>PostgreSQL</productname> version 16 and later,
|
|
|
|
it is desirable for base types' input functions to
|
|
|
|
return <quote>soft</quote> errors using the
|
|
|
|
new <function>errsave()</function>/<function>ereturn()</function>
|
|
|
|
mechanism, rather than throwing <function>ereport()</function>
|
|
|
|
exceptions as in previous versions.
|
|
|
|
See <filename>src/backend/utils/fmgr/README</filename> for more
|
|
|
|
information.
|
|
|
|
</para>
|
|
|
|
|
2001-09-13 21:05:29 +02:00
|
|
|
</refsect1>
|
2006-12-30 22:21:56 +01:00
|
|
|
|
1998-07-29 08:23:26 +02:00
|
|
|
<refsect1>
|
|
|
|
<title>Examples</title>
|
2003-04-22 12:08:08 +02:00
|
|
|
|
1998-07-29 08:23:26 +02:00
|
|
|
<para>
|
2003-05-09 00:19:58 +02:00
|
|
|
This example creates a composite type and uses it in
|
|
|
|
a function definition:
|
|
|
|
<programlisting>
|
|
|
|
CREATE TYPE compfoo AS (f1 int, f2 text);
|
2005-01-04 01:39:53 +01:00
|
|
|
|
2004-05-17 01:22:08 +02:00
|
|
|
CREATE FUNCTION getfoo() RETURNS SETOF compfoo AS $$
|
|
|
|
SELECT fooid, fooname FROM foo
|
|
|
|
$$ LANGUAGE SQL;
|
2003-05-09 00:19:58 +02:00
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
2007-04-02 05:49:42 +02:00
|
|
|
<para>
|
|
|
|
This example creates an enumerated type and uses it in
|
|
|
|
a table definition:
|
|
|
|
<programlisting>
|
|
|
|
CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed');
|
|
|
|
|
|
|
|
CREATE TABLE bug (
|
2007-10-29 18:29:59 +01:00
|
|
|
id serial,
|
2007-04-02 05:49:42 +02:00
|
|
|
description text,
|
|
|
|
status bug_status
|
|
|
|
);
|
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
2011-11-19 00:23:55 +01:00
|
|
|
<para>
|
|
|
|
This example creates a range type:
|
|
|
|
<programlisting>
|
|
|
|
CREATE TYPE float8_range AS RANGE (subtype = float8, subtype_diff = float8mi);
|
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
2003-05-09 00:19:58 +02:00
|
|
|
<para>
|
|
|
|
This example creates the base data type <type>box</type> and then uses the
|
2001-01-14 00:58:55 +01:00
|
|
|
type in a table definition:
|
2001-09-13 21:05:29 +02:00
|
|
|
<programlisting>
|
2006-02-28 23:37:27 +01:00
|
|
|
CREATE TYPE box;
|
|
|
|
|
|
|
|
CREATE FUNCTION my_box_in_function(cstring) RETURNS box AS ... ;
|
|
|
|
CREATE FUNCTION my_box_out_function(box) RETURNS cstring AS ... ;
|
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
CREATE TYPE box (
|
|
|
|
INTERNALLENGTH = 16,
|
|
|
|
INPUT = my_box_in_function,
|
|
|
|
OUTPUT = my_box_out_function
|
|
|
|
);
|
|
|
|
|
|
|
|
CREATE TABLE myboxes (
|
|
|
|
id integer,
|
|
|
|
description box
|
|
|
|
);
|
2001-09-13 21:05:29 +02:00
|
|
|
</programlisting>
|
1999-07-22 17:09:15 +02:00
|
|
|
</para>
|
|
|
|
|
1998-07-29 08:23:26 +02:00
|
|
|
<para>
|
2003-04-22 12:08:08 +02:00
|
|
|
If the internal structure of <type>box</type> were an array of four
|
2017-10-09 03:44:17 +02:00
|
|
|
<type>float4</type> elements, we might instead use:
|
2001-09-13 21:05:29 +02:00
|
|
|
<programlisting>
|
2003-04-22 12:08:08 +02:00
|
|
|
CREATE TYPE box (
|
|
|
|
INTERNALLENGTH = 16,
|
|
|
|
INPUT = my_box_in_function,
|
|
|
|
OUTPUT = my_box_out_function,
|
|
|
|
ELEMENT = float4
|
|
|
|
);
|
2001-09-13 21:05:29 +02:00
|
|
|
</programlisting>
|
2003-04-22 12:08:08 +02:00
|
|
|
which would allow a box value's component numbers to be accessed
|
2001-11-03 22:42:47 +01:00
|
|
|
by subscripting. Otherwise the type behaves the same as before.
|
1999-07-06 19:16:42 +02:00
|
|
|
</para>
|
1999-07-22 17:09:15 +02:00
|
|
|
|
1998-07-29 08:23:26 +02:00
|
|
|
<para>
|
2001-11-03 22:42:47 +01:00
|
|
|
This example creates a large object type and uses it in
|
2001-01-14 00:58:55 +01:00
|
|
|
a table definition:
|
2001-09-13 21:05:29 +02:00
|
|
|
<programlisting>
|
2003-04-22 12:08:08 +02:00
|
|
|
CREATE TYPE bigobj (
|
|
|
|
INPUT = lo_filein, OUTPUT = lo_fileout,
|
|
|
|
INTERNALLENGTH = VARIABLE
|
|
|
|
);
|
|
|
|
CREATE TABLE big_objs (
|
|
|
|
id integer,
|
|
|
|
obj bigobj
|
|
|
|
);
|
2002-08-15 18:36:08 +02:00
|
|
|
</programlisting>
|
|
|
|
</para>
|
|
|
|
|
2003-04-22 12:08:08 +02:00
|
|
|
<para>
|
|
|
|
More examples, including suitable input and output functions, are
|
2017-11-23 15:39:47 +01:00
|
|
|
in <xref linkend="xtypes"/>.
|
2003-04-22 12:08:08 +02:00
|
|
|
</para>
|
2001-09-13 21:05:29 +02:00
|
|
|
</refsect1>
|
1998-07-29 08:23:26 +02:00
|
|
|
|
2017-10-20 03:16:39 +02:00
|
|
|
<refsect1 id="sql-createtype-compatibility">
|
2001-09-13 21:05:29 +02:00
|
|
|
<title>Compatibility</title>
|
1998-12-29 03:24:47 +01:00
|
|
|
|
2001-09-13 21:05:29 +02:00
|
|
|
<para>
|
2010-09-26 13:41:03 +02:00
|
|
|
The first form of the <command>CREATE TYPE</command> command, which
|
2017-10-09 03:44:17 +02:00
|
|
|
creates a composite type, conforms to the <acronym>SQL</acronym> standard.
|
2010-09-26 13:41:03 +02:00
|
|
|
The other forms are <productname>PostgreSQL</productname>
|
|
|
|
extensions. The <command>CREATE TYPE</command> statement in
|
2017-10-09 03:44:17 +02:00
|
|
|
the <acronym>SQL</acronym> standard also defines other forms that are not
|
|
|
|
implemented in <productname>PostgreSQL</productname>.
|
2010-09-26 13:41:03 +02:00
|
|
|
</para>
|
|
|
|
|
|
|
|
<para>
|
|
|
|
The ability to create a composite type with zero attributes is
|
|
|
|
a <productname>PostgreSQL</productname>-specific deviation from the
|
2011-11-19 00:23:55 +01:00
|
|
|
standard (analogous to the same case in <command>CREATE TABLE</command>).
|
2001-09-13 21:05:29 +02:00
|
|
|
</para>
|
1998-07-29 08:23:26 +02:00
|
|
|
</refsect1>
|
|
|
|
|
2017-10-20 03:16:39 +02:00
|
|
|
<refsect1 id="sql-createtype-see-also">
|
2001-09-13 21:05:29 +02:00
|
|
|
<title>See Also</title>
|
|
|
|
|
|
|
|
<simplelist type="inline">
|
2017-11-23 15:39:47 +01:00
|
|
|
<member><xref linkend="sql-altertype"/></member>
|
|
|
|
<member><xref linkend="sql-createdomain"/></member>
|
|
|
|
<member><xref linkend="sql-createfunction"/></member>
|
|
|
|
<member><xref linkend="sql-droptype"/></member>
|
2001-09-13 21:05:29 +02:00
|
|
|
</simplelist>
|
1998-12-29 03:24:47 +01:00
|
|
|
</refsect1>
|
2001-09-13 21:05:29 +02:00
|
|
|
|
1999-07-06 19:16:42 +02:00
|
|
|
</refentry>
|