From ab9907f5e5eba6e4e17a279d07a1a9df21ec5b19 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 29 Jul 2008 18:31:20 +0000 Subject: [PATCH] Add a new, improved version of citext as a contrib module. David E. Wheeler --- contrib/Makefile | 3 +- contrib/README | 4 + contrib/citext/Makefile | 17 + contrib/citext/citext.c | 268 +++++++ contrib/citext/citext.sql.in | 450 +++++++++++ contrib/citext/expected/citext.out | 1153 +++++++++++++++++++++++++++ contrib/citext/sql/citext.sql | 328 ++++++++ contrib/citext/uninstall_citext.sql | 69 ++ doc/src/sgml/citext.sgml | 222 ++++++ doc/src/sgml/contrib.sgml | 3 +- doc/src/sgml/filelist.sgml | 3 +- 11 files changed, 2517 insertions(+), 3 deletions(-) create mode 100644 contrib/citext/Makefile create mode 100644 contrib/citext/citext.c create mode 100644 contrib/citext/citext.sql.in create mode 100644 contrib/citext/expected/citext.out create mode 100644 contrib/citext/sql/citext.sql create mode 100644 contrib/citext/uninstall_citext.sql create mode 100644 doc/src/sgml/citext.sgml diff --git a/contrib/Makefile b/contrib/Makefile index b9453dad7e..56f6772ba1 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -1,4 +1,4 @@ -# $PostgreSQL: pgsql/contrib/Makefile,v 1.83 2008/05/08 16:49:36 tgl Exp $ +# $PostgreSQL: pgsql/contrib/Makefile,v 1.84 2008/07/29 18:31:20 tgl Exp $ subdir = contrib top_builddir = .. @@ -8,6 +8,7 @@ WANTED_DIRS = \ adminpack \ btree_gist \ chkpass \ + citext \ cube \ dblink \ dict_int \ diff --git a/contrib/README b/contrib/README index 142c696931..d1345f669a 100644 --- a/contrib/README +++ b/contrib/README @@ -36,6 +36,10 @@ chkpass - An auto-encrypted password datatype by D'Arcy J.M. Cain +citext - + A case-insensitive character string datatype + by David E. Wheeler + cube - Multidimensional-cube datatype (GiST indexing example) by Gene Selkov, Jr. diff --git a/contrib/citext/Makefile b/contrib/citext/Makefile new file mode 100644 index 0000000000..d38e2762dd --- /dev/null +++ b/contrib/citext/Makefile @@ -0,0 +1,17 @@ +# $PostgreSQL: pgsql/contrib/citext/Makefile,v 1.1 2008/07/29 18:31:20 tgl Exp $ + +MODULES = citext +DATA_built = citext.sql +DATA = uninstall_citext.sql +REGRESS = citext + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/citext +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/citext/citext.c b/contrib/citext/citext.c new file mode 100644 index 0000000000..eb885c0037 --- /dev/null +++ b/contrib/citext/citext.c @@ -0,0 +1,268 @@ +/* + * $PostgreSQL: pgsql/contrib/citext/citext.c,v 1.1 2008/07/29 18:31:20 tgl Exp $ + */ +#include "postgres.h" + +#include "access/hash.h" +#include "fmgr.h" +#include "utils/builtins.h" +#include "utils/formatting.h" + +#ifdef PG_MODULE_MAGIC +PG_MODULE_MAGIC; +#endif + +/* + * ==================== + * FORWARD DECLARATIONS + * ==================== + */ + +static int32 citextcmp (text *left, text *right); +extern Datum citext_cmp (PG_FUNCTION_ARGS); +extern Datum citext_hash (PG_FUNCTION_ARGS); +extern Datum citext_eq (PG_FUNCTION_ARGS); +extern Datum citext_ne (PG_FUNCTION_ARGS); +extern Datum citext_gt (PG_FUNCTION_ARGS); +extern Datum citext_ge (PG_FUNCTION_ARGS); +extern Datum citext_lt (PG_FUNCTION_ARGS); +extern Datum citext_le (PG_FUNCTION_ARGS); +extern Datum citext_smaller (PG_FUNCTION_ARGS); +extern Datum citext_larger (PG_FUNCTION_ARGS); + +/* + * ================= + * UTILITY FUNCTIONS + * ================= + */ + +/* + * citextcmp() + * Internal comparison function for citext strings. + * Returns int32 negative, zero, or positive. + */ +static int32 +citextcmp (text *left, text *right) +{ + char *lcstr, *rcstr; + int32 result; + + lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left)); + rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right)); + + result = varstr_cmp(lcstr, strlen(lcstr), + rcstr, strlen(rcstr)); + + pfree(lcstr); + pfree(rcstr); + + return result; +} + +/* + * ================== + * INDEXING FUNCTIONS + * ================== + */ + +PG_FUNCTION_INFO_V1(citext_cmp); + +Datum +citext_cmp(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + int32 result; + + result = citextcmp(left, right); + + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_INT32(result); +} + +PG_FUNCTION_INFO_V1(citext_hash); + +Datum +citext_hash(PG_FUNCTION_ARGS) +{ + text *txt = PG_GETARG_TEXT_PP(0); + char *str; + Datum result; + + str = str_tolower(VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt)); + result = hash_any((unsigned char *) str, strlen(str)); + pfree(str); + + /* Avoid leaking memory for toasted inputs */ + PG_FREE_IF_COPY(txt, 0); + + PG_RETURN_DATUM(result); +} + +/* + * ================== + * OPERATOR FUNCTIONS + * ================== + */ + +PG_FUNCTION_INFO_V1(citext_eq); + +Datum +citext_eq(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + char *lcstr, *rcstr; + bool result; + + /* We can't compare lengths in advance of downcasing ... */ + + lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left)); + rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right)); + + /* + * Since we only care about equality or not-equality, we can + * avoid all the expense of strcoll() here, and just do bitwise + * comparison. + */ + result = (strcmp(lcstr, rcstr) == 0); + + pfree(lcstr); + pfree(rcstr); + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_BOOL(result); +} + +PG_FUNCTION_INFO_V1(citext_ne); + +Datum +citext_ne(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + char *lcstr, *rcstr; + bool result; + + /* We can't compare lengths in advance of downcasing ... */ + + lcstr = str_tolower(VARDATA_ANY(left), VARSIZE_ANY_EXHDR(left)); + rcstr = str_tolower(VARDATA_ANY(right), VARSIZE_ANY_EXHDR(right)); + + /* + * Since we only care about equality or not-equality, we can + * avoid all the expense of strcoll() here, and just do bitwise + * comparison. + */ + result = (strcmp(lcstr, rcstr) != 0); + + pfree(lcstr); + pfree(rcstr); + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_BOOL(result); +} + +PG_FUNCTION_INFO_V1(citext_lt); + +Datum +citext_lt(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + bool result; + + result = citextcmp(left, right) < 0; + + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_BOOL(result); +} + +PG_FUNCTION_INFO_V1(citext_le); + +Datum +citext_le(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + bool result; + + result = citextcmp(left, right) <= 0; + + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_BOOL(result); +} + +PG_FUNCTION_INFO_V1(citext_gt); + +Datum +citext_gt(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + bool result; + + result = citextcmp(left, right) > 0; + + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_BOOL(result); +} + +PG_FUNCTION_INFO_V1(citext_ge); + +Datum +citext_ge(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + bool result; + + result = citextcmp(left, right) >= 0; + + PG_FREE_IF_COPY(left, 0); + PG_FREE_IF_COPY(right, 1); + + PG_RETURN_BOOL(result); +} + +/* + * =================== + * AGGREGATE FUNCTIONS + * =================== + */ + +PG_FUNCTION_INFO_V1(citext_smaller); + +Datum +citext_smaller(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + text *result; + + result = citextcmp(left, right) < 0 ? left : right; + PG_RETURN_TEXT_P(result); +} + +PG_FUNCTION_INFO_V1(citext_larger); + +Datum +citext_larger(PG_FUNCTION_ARGS) +{ + text *left = PG_GETARG_TEXT_PP(0); + text *right = PG_GETARG_TEXT_PP(1); + text *result; + + result = citextcmp(left, right) > 0 ? left : right; + PG_RETURN_TEXT_P(result); +} diff --git a/contrib/citext/citext.sql.in b/contrib/citext/citext.sql.in new file mode 100644 index 0000000000..c8d50cff52 --- /dev/null +++ b/contrib/citext/citext.sql.in @@ -0,0 +1,450 @@ +/* $PostgreSQL: pgsql/contrib/citext/citext.sql.in,v 1.1 2008/07/29 18:31:20 tgl Exp $ */ + +-- Adjust this setting to control where the objects get created. +SET search_path = public; + +-- +-- PostgreSQL code for CITEXT. +-- +-- Most I/O functions, and a few others, piggyback on the "text" type +-- functions via the implicit cast to text. +-- + +-- +-- Shell type to keep things a bit quieter. +-- + +CREATE TYPE citext; + +-- +-- Input and output functions. +-- +CREATE OR REPLACE FUNCTION citextin(cstring) +RETURNS citext +AS 'textin' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citextout(citext) +RETURNS cstring +AS 'textout' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citextrecv(internal) +RETURNS citext +AS 'textrecv' +LANGUAGE 'internal' STABLE STRICT; + +CREATE OR REPLACE FUNCTION citextsend(citext) +RETURNS bytea +AS 'textsend' +LANGUAGE 'internal' STABLE STRICT; + +-- +-- The type itself. +-- + +CREATE TYPE citext ( + INPUT = citextin, + OUTPUT = citextout, + RECEIVE = citextrecv, + SEND = citextsend, + INTERNALLENGTH = VARIABLE, + STORAGE = extended +); + +-- +-- A single cast function, since bpchar needs to have its whitespace trimmed +-- before it's cast to citext. +-- +CREATE OR REPLACE FUNCTION citext(bpchar) +RETURNS citext +AS 'rtrim1' +LANGUAGE 'internal' IMMUTABLE STRICT; + +-- +-- Implicit and assignment type casts. +-- + +CREATE CAST (citext AS text) WITHOUT FUNCTION AS IMPLICIT; +CREATE CAST (citext AS varchar) WITHOUT FUNCTION AS IMPLICIT; +CREATE CAST (citext AS bpchar) WITHOUT FUNCTION AS ASSIGNMENT; +CREATE CAST (text AS citext) WITHOUT FUNCTION AS ASSIGNMENT; +CREATE CAST (varchar AS citext) WITHOUT FUNCTION AS ASSIGNMENT; +CREATE CAST (bpchar AS citext) WITH FUNCTION citext(bpchar) AS ASSIGNMENT; + +-- +-- Operator Functions. +-- + +CREATE OR REPLACE FUNCTION citext_eq( citext, citext ) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citext_ne( citext, citext ) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citext_lt( citext, citext ) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citext_le( citext, citext ) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citext_gt( citext, citext ) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citext_ge( citext, citext ) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +-- We overload || just to preserve "citext-ness" of the result. +CREATE OR REPLACE FUNCTION textcat(citext, citext) +RETURNS citext +AS 'textcat' +LANGUAGE 'internal' IMMUTABLE STRICT; + +-- +-- Operators. +-- + +CREATE OPERATOR = ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + COMMUTATOR = =, + NEGATOR = <>, + PROCEDURE = citext_eq, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +CREATE OPERATOR <> ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + NEGATOR = =, + COMMUTATOR = <>, + PROCEDURE = citext_ne, + RESTRICT = neqsel, + JOIN = neqjoinsel +); + +CREATE OPERATOR < ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + NEGATOR = >=, + COMMUTATOR = >, + PROCEDURE = citext_lt, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR <= ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + NEGATOR = >, + COMMUTATOR = >=, + PROCEDURE = citext_le, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +CREATE OPERATOR >= ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + NEGATOR = <, + COMMUTATOR = <=, + PROCEDURE = citext_ge, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR > ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + NEGATOR = <=, + COMMUTATOR = <, + PROCEDURE = citext_gt, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +CREATE OPERATOR || ( + LEFTARG = CITEXT, + RIGHTARG = CITEXT, + PROCEDURE = textcat +); + +-- +-- Support functions for indexing. +-- + +CREATE OR REPLACE FUNCTION citext_cmp(citext, citext) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION citext_hash(citext) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +-- +-- The btree indexing operator class. +-- + +CREATE OPERATOR CLASS citext_ops +DEFAULT FOR TYPE CITEXT USING btree AS + OPERATOR 1 < (citext, citext), + OPERATOR 2 <= (citext, citext), + OPERATOR 3 = (citext, citext), + OPERATOR 4 >= (citext, citext), + OPERATOR 5 > (citext, citext), + FUNCTION 1 citext_cmp(citext, citext); + +-- +-- The hash indexing operator class. +-- + +CREATE OPERATOR CLASS citext_ops +DEFAULT FOR TYPE citext USING hash AS + OPERATOR 1 = (citext, citext), + FUNCTION 1 citext_hash(citext); + +-- +-- Aggregates. +-- + +CREATE OR REPLACE FUNCTION citext_smaller(citext, citext) +RETURNS citext +AS 'MODULE_PATHNAME' +LANGUAGE 'C' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION citext_larger(citext, citext) +RETURNS citext +AS 'MODULE_PATHNAME' +LANGUAGE 'C' IMMUTABLE STRICT; + +CREATE AGGREGATE min(citext) ( + SFUNC = citext_smaller, + STYPE = citext, + SORTOP = < +); + +CREATE AGGREGATE max(citext) ( + SFUNC = citext_larger, + STYPE = citext, + SORTOP = > +); + +-- +-- Miscellaneous functions +-- These exist to preserve the "citext-ness" of the input. +-- + +CREATE OR REPLACE FUNCTION lower(citext) +RETURNS citext AS 'lower' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION upper(citext) +RETURNS citext AS 'upper' +LANGUAGE 'internal' IMMUTABLE STRICT; + +-- needed to avoid "function is not unique" errors +-- XXX find a better way to deal with this... +CREATE FUNCTION quote_literal(citext) +RETURNS text AS 'quote_literal' +LANGUAGE 'internal' IMMUTABLE STRICT; + +-- +-- CITEXT pattern matching. +-- + +CREATE OR REPLACE FUNCTION texticlike(citext, citext) +RETURNS bool AS 'texticlike' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION texticnlike(citext, citext) +RETURNS bool AS 'texticnlike' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION texticregexeq(citext, citext) +RETURNS bool AS 'texticregexeq' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION texticregexne(citext, citext) +RETURNS bool AS 'texticregexne' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OPERATOR ~ ( + PROCEDURE = texticregexeq, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = !~, + RESTRICT = icregexeqsel, + JOIN = icregexeqjoinsel +); + +CREATE OPERATOR ~* ( + PROCEDURE = texticregexeq, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = !~*, + RESTRICT = icregexeqsel, + JOIN = icregexeqjoinsel +); + +CREATE OPERATOR !~ ( + PROCEDURE = texticregexne, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = ~, + RESTRICT = icregexnesel, + JOIN = icregexnejoinsel +); + +CREATE OPERATOR !~* ( + PROCEDURE = texticregexne, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = ~*, + RESTRICT = icregexnesel, + JOIN = icregexnejoinsel +); + +CREATE OPERATOR ~~ ( + PROCEDURE = texticlike, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = !~~, + RESTRICT = iclikesel, + JOIN = iclikejoinsel +); + +CREATE OPERATOR ~~* ( + PROCEDURE = texticlike, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = !~~*, + RESTRICT = iclikesel, + JOIN = iclikejoinsel +); + +CREATE OPERATOR !~~ ( + PROCEDURE = texticnlike, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = ~~, + RESTRICT = icnlikesel, + JOIN = icnlikejoinsel +); + +CREATE OPERATOR !~~* ( + PROCEDURE = texticnlike, + LEFTARG = citext, + RIGHTARG = citext, + NEGATOR = ~~*, + RESTRICT = icnlikesel, + JOIN = icnlikejoinsel +); + +-- +-- Matching citext to text. +-- + +CREATE OR REPLACE FUNCTION texticlike(citext, text) +RETURNS bool AS 'texticlike' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION texticnlike(citext, text) +RETURNS bool AS 'texticnlike' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION texticregexeq(citext, text) +RETURNS bool AS 'texticregexeq' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OR REPLACE FUNCTION texticregexne(citext, text) +RETURNS bool AS 'texticregexne' +LANGUAGE 'internal' IMMUTABLE STRICT; + +CREATE OPERATOR ~ ( + PROCEDURE = texticregexeq, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = !~, + RESTRICT = icregexeqsel, + JOIN = icregexeqjoinsel +); + +CREATE OPERATOR ~* ( + PROCEDURE = texticregexeq, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = !~*, + RESTRICT = icregexeqsel, + JOIN = icregexeqjoinsel +); + +CREATE OPERATOR !~ ( + PROCEDURE = texticregexne, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = ~, + RESTRICT = icregexnesel, + JOIN = icregexnejoinsel +); + +CREATE OPERATOR !~* ( + PROCEDURE = texticregexne, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = ~*, + RESTRICT = icregexnesel, + JOIN = icregexnejoinsel +); + +CREATE OPERATOR ~~ ( + PROCEDURE = texticlike, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = !~~, + RESTRICT = iclikesel, + JOIN = iclikejoinsel +); + +CREATE OPERATOR ~~* ( + PROCEDURE = texticlike, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = !~~*, + RESTRICT = iclikesel, + JOIN = iclikejoinsel +); + +CREATE OPERATOR !~~ ( + PROCEDURE = texticnlike, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = ~~, + RESTRICT = icnlikesel, + JOIN = icnlikejoinsel +); + +CREATE OPERATOR !~~* ( + PROCEDURE = texticnlike, + LEFTARG = citext, + RIGHTARG = text, + NEGATOR = ~~*, + RESTRICT = icnlikesel, + JOIN = icnlikejoinsel +); diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out new file mode 100644 index 0000000000..7c207f4e9c --- /dev/null +++ b/contrib/citext/expected/citext.out @@ -0,0 +1,1153 @@ +-- +-- Test citext datatype +-- +-- +-- first, define the datatype. Turn off echoing so that expected file +-- does not depend on contents of citext.sql. +-- +SET client_min_messages = warning; +\set ECHO none +-- Test the operators and indexing functions +-- Test = and <>. +SELECT 'a'::citext = 'a'::citext AS t; + t +--- + t +(1 row) + +SELECT 'a'::citext = 'A'::citext AS t; + t +--- + t +(1 row) + +SELECT 'a'::citext = 'A'::text AS f; -- text wins the discussion + f +--- + f +(1 row) + +SELECT 'a'::citext = 'b'::citext AS f; + f +--- + f +(1 row) + +SELECT 'a'::citext = 'ab'::citext AS f; + f +--- + f +(1 row) + +SELECT 'a'::citext <> 'ab'::citext AS t; + t +--- + t +(1 row) + +-- Multibyte sanity tests. Uncomment to run. +-- SELECT 'À'::citext = 'À'::citext AS t; +-- SELECT 'À'::citext = 'à'::citext AS t; +-- SELECT 'À'::text = 'à'::text AS f; -- text wins. +-- SELECT 'À'::citext <> 'B'::citext AS t; +-- Test combining characters making up canonically equivalent strings. +-- SELECT 'Ä'::text <> 'Ä'::text AS t; +-- SELECT 'Ä'::citext <> 'Ä'::citext AS t; +-- Test the Turkish dotted I. The lowercase is a single byte while the +-- uppercase is multibyte. This is why the comparison code can't be optimized +-- to compare string lengths. +-- SELECT 'i'::citext = 'İ'::citext AS t; +-- Regression. +-- SELECT 'láska'::citext <> 'laská'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive; +-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative; +-- Test > and >= +SELECT 'B'::citext > 'a'::citext AS t; + t +--- + t +(1 row) + +SELECT 'b'::citext > 'A'::citext AS t; + t +--- + t +(1 row) + +SELECT 'B'::citext > 'b'::citext AS f; + f +--- + f +(1 row) + +SELECT 'B'::citext >= 'b'::citext AS t; + t +--- + t +(1 row) + +-- Test < and <= +SELECT 'a'::citext < 'B'::citext AS t; + t +--- + t +(1 row) + +SELECT 'a'::citext <= 'B'::citext AS t; + t +--- + t +(1 row) + +-- Test implicit casting. citext casts to text, but not vice-versa. +SELECT 'a'::citext = 'a'::text AS t; + t +--- + t +(1 row) + +SELECT 'A'::text <> 'a'::citext AS t; + t +--- + t +(1 row) + +SELECT 'B'::citext < 'a'::text AS t; -- text wins. + t +--- + t +(1 row) + +SELECT 'B'::citext <= 'a'::text AS t; -- text wins. + t +--- + t +(1 row) + +SELECT 'a'::citext > 'B'::text AS t; -- text wins. + t +--- + t +(1 row) + +SELECT 'a'::citext >= 'B'::text AS t; -- text wins. + t +--- + t +(1 row) + +-- Test implicit casting. citext casts to varchar, but not vice-versa. +SELECT 'a'::citext = 'a'::varchar AS t; + t +--- + t +(1 row) + +SELECT 'A'::varchar <> 'a'::citext AS t; + t +--- + t +(1 row) + +SELECT 'B'::citext < 'a'::varchar AS t; -- varchar wins. + t +--- + t +(1 row) + +SELECT 'B'::citext <= 'a'::varchar AS t; -- varchar wins. + t +--- + t +(1 row) + +SELECT 'a'::citext > 'B'::varchar AS t; -- varchar wins. + t +--- + t +(1 row) + +SELECT 'a'::citext >= 'B'::varchar AS t; -- varchar wins. + t +--- + t +(1 row) + +-- A couple of longer examlpes to ensure that we don't get any issues with bad +-- conversions to char[] in the c code. Yes, I did do this. +SELECT 'aardvark'::citext = 'aardvark'::citext AS t; + t +--- + t +(1 row) + +SELECT 'aardvark'::citext = 'aardVark'::citext AS t; + t +--- + t +(1 row) + +-- Check the citext_cmp() function explicitly. +SELECT citext_cmp('aardvark'::citext, 'aardvark'::citext) AS zero; + zero +------ + 0 +(1 row) + +SELECT citext_cmp('aardvark'::citext, 'aardVark'::citext) AS zero; + zero +------ + 0 +(1 row) + +SELECT citext_cmp('AARDVARK'::citext, 'AARDVARK'::citext) AS zero; + zero +------ + 0 +(1 row) + +SELECT citext_cmp('B'::citext, 'a'::citext) AS one; + one +----- + 1 +(1 row) + +-- Do some tests using a table and index. +CREATE TEMP TABLE try ( + name citext PRIMARY KEY +); +NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "try_pkey" for table "try" +INSERT INTO try (name) +VALUES ('a'), ('ab'), ('â'), ('aba'), ('b'), ('ba'), ('bab'), ('AZ'); +SELECT name, 'a' = name AS eq_a FROM try WHERE name <> 'â'; + name | eq_a +------+------ + a | t + ab | f + aba | f + b | f + ba | f + bab | f + AZ | f +(7 rows) + +SELECT name, 'a' = name AS t FROM try where name = 'a'; + name | t +------+--- + a | t +(1 row) + +SELECT name, 'A' = name AS "eq_A" FROM try WHERE name <> 'â'; + name | eq_A +------+------ + a | t + ab | f + aba | f + b | f + ba | f + bab | f + AZ | f +(7 rows) + +SELECT name, 'A' = name AS t FROM try where name = 'A'; + name | t +------+--- + a | t +(1 row) + +SELECT name, 'A' = name AS t FROM try where name = 'A'; + name | t +------+--- + a | t +(1 row) + +-- expected failures on duplicate key +INSERT INTO try (name) VALUES ('a'); +ERROR: duplicate key value violates unique constraint "try_pkey" +INSERT INTO try (name) VALUES ('A'); +ERROR: duplicate key value violates unique constraint "try_pkey" +INSERT INTO try (name) VALUES ('aB'); +ERROR: duplicate key value violates unique constraint "try_pkey" +-- Make sure that citext_smaller() and citext_lager() work properly. +SELECT citext_smaller( 'aa'::citext, 'ab'::citext ) = 'aa' AS t; + t +--- + t +(1 row) + +SELECT citext_smaller( 'AAAA'::citext, 'bbbb'::citext ) = 'AAAA' AS t; + t +--- + t +(1 row) + +SELECT citext_smaller( 'aardvark'::citext, 'Aaba'::citext ) = 'Aaba' AS t; + t +--- + t +(1 row) + +SELECT citext_smaller( 'aardvark'::citext, 'AARDVARK'::citext ) = 'AARDVARK' AS t; + t +--- + t +(1 row) + +SELECT citext_larger( 'aa'::citext, 'ab'::citext ) = 'ab' AS t; + t +--- + t +(1 row) + +SELECT citext_larger( 'AAAA'::citext, 'bbbb'::citext ) = 'bbbb' AS t; + t +--- + t +(1 row) + +SELECT citext_larger( 'aardvark'::citext, 'Aaba'::citext ) = 'aardvark' AS t; + t +--- + t +(1 row) + +-- Test aggregate functions and sort ordering +CREATE TEMP TABLE srt ( + name CITEXT +); +INSERT INTO srt (name) +VALUES ('aardvark'), + ('AAA'), + ('aba'), + ('ABC'), + ('abd'); +-- Check the min() and max() aggregates, with and without index. +set enable_seqscan = off; +SELECT MIN(name) AS "AAA" FROM srt; + AAA +----- + AAA +(1 row) + +SELECT MAX(name) AS abd FROM srt; + abd +----- + abd +(1 row) + +reset enable_seqscan; +set enable_indexscan = off; +SELECT MIN(name) AS "AAA" FROM srt; + AAA +----- + AAA +(1 row) + +SELECT MAX(name) AS abd FROM srt; + abd +----- + abd +(1 row) + +reset enable_indexscan; +-- Check sorting likewise +set enable_seqscan = off; +SELECT name FROM srt ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +reset enable_seqscan; +set enable_indexscan = off; +SELECT name FROM srt ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +reset enable_indexscan; +-- Test assignment casts. +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::text; + aaa +----- + aaa +(1 row) + +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::varchar; + aaa +----- + aaa +(1 row) + +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::bpchar; + aaa +----- + aaa +(1 row) + +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'; + aaa +----- + aaa +(1 row) + +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::citext; + aaa +----- + aaa +(1 row) + +-- LIKE shoudl be case-insensitive +SELECT name FROM srt WHERE name LIKE '%a%' ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +SELECT name FROM srt WHERE name NOT LIKE '%b%' ORDER BY name; + name +---------- + AAA + aardvark +(2 rows) + +SELECT name FROM srt WHERE name LIKE '%A%' ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +SELECT name FROM srt WHERE name NOT LIKE '%B%' ORDER BY name; + name +---------- + AAA + aardvark +(2 rows) + +-- ~~ should be case-insensitive +SELECT name FROM srt WHERE name ~~ '%a%' ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +SELECT name FROM srt WHERE name !~~ '%b%' ORDER BY name; + name +---------- + AAA + aardvark +(2 rows) + +SELECT name FROM srt WHERE name ~~ '%A%' ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +SELECT name FROM srt WHERE name !~~ '%B%' ORDER BY name; + name +---------- + AAA + aardvark +(2 rows) + +-- ~ should be case-insensitive +SELECT name FROM srt WHERE name ~ '^a' ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +SELECT name FROM srt WHERE name !~ 'a$' ORDER BY name; + name +---------- + aardvark + ABC + abd +(3 rows) + +SELECT name FROM srt WHERE name ~ '^A' ORDER BY name; + name +---------- + AAA + aardvark + aba + ABC + abd +(5 rows) + +SELECT name FROM srt WHERE name !~ 'A$' ORDER BY name; + name +---------- + aardvark + ABC + abd +(3 rows) + +-- SIMILAR TO should be case-insensitive. +SELECT name FROM srt WHERE name SIMILAR TO '%a.*'; + name +------ + AAA + aba +(2 rows) + +SELECT name FROM srt WHERE name SIMILAR TO '%A.*'; + name +------ + AAA + aba +(2 rows) + +-- Table 9-5. SQL String Functions and Operators +SELECT 'D'::citext || 'avid'::citext = 'David'::citext AS citext_concat; + citext_concat +--------------- + t +(1 row) + +SELECT 'Value: '::citext || 42 = 'Value: 42' AS text_concat; + text_concat +------------- + t +(1 row) + +SELECT 42 || ': value'::citext ='42: value' AS int_concat; + int_concat +------------ + t +(1 row) + +SELECT bit_length('jose'::citext) = 32 AS t; + t +--- + t +(1 row) + +SELECT bit_length( name ) = bit_length( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT textlen( name ) = textlen( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT char_length( name ) = char_length( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT lower( name ) = lower( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT octet_length( name ) = octet_length( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT overlay( name placing 'hom' from 2 for 4) = overlay( name::text placing 'hom' from 2 for 4) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT position( 'a' IN name ) = position( 'a' IN name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT substr('alphabet'::citext, 3) = 'phabet' AS t; + t +--- + t +(1 row) + +SELECT substr('alphabet'::citext, 3, 2) = 'ph' AS t; + t +--- + t +(1 row) + +SELECT substring('alphabet'::citext, 3) = 'phabet' AS t; + t +--- + t +(1 row) + +SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t; + t +--- + t +(1 row) + +SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t; + t +--- + t +(1 row) + +SELECT substring('Thomas'::citext from 2) = 'homas' AS t; + t +--- + t +(1 row) + +SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; + t +--- + t +(1 row) + +SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; + t +--- + t +(1 row) + +SELECT trim(' trim '::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT trim('xxxxxxtrimxxxx'::text, 'x'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT trim('xxxxxtrimxxxx'::text, 'x'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT upper( name ) = upper( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +-- Table 9-6. Other String Functions. +SELECT ascii( name ) = ascii( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT btrim(' trim'::citext ) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT btrim('xxxxxtrimxxxx'::citext, 'x'::citext ) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT btrim('xyxtrimyyx'::citext, 'xy'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT btrim('xyxtrimyyx'::text, 'xy'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT btrim('xyxtrimyyx'::citext, 'xy'::text ) = 'trim' AS t; + t +--- + t +(1 row) + +-- chr() takes an int and returns text. +-- convert() and convert_from take bytea and return text. +SELECT convert_to( name, 'ISO-8859-1' ) = convert_to( name::text, 'ISO-8859-1' ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT decode('MTIzAAE='::citext, 'base64') = decode('MTIzAAE='::text, 'base64') AS t; + t +--- + t +(1 row) + +-- encode() takes bytea and returns text. +SELECT initcap('hi THOMAS'::citext) = initcap('hi THOMAS'::text) AS t; + t +--- + t +(1 row) + +SELECT length( name ) = length( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT lpad('hi'::citext, 5 ) = ' hi' AS t; + t +--- + t +(1 row) + +SELECT lpad('hi'::citext, 5, 'xy'::citext) = 'xyxhi' AS t; + t +--- + t +(1 row) + +SELECT lpad('hi'::text, 5, 'xy'::citext) = 'xyxhi' AS t; + t +--- + t +(1 row) + +SELECT lpad('hi'::citext, 5, 'xy'::text ) = 'xyxhi' AS t; + t +--- + t +(1 row) + +SELECT ltrim(' trim'::citext ) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT ltrim('zzzytrim'::citext, 'xyz'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT ltrim('zzzytrim'::text, 'xyz'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT ltrim('zzzytrim'::citext, 'xyz'::text ) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT md5( name ) = md5( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +-- pg_client_encoding() takes no args and returns name. +SELECT quote_ident( name ) = quote_ident( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT quote_literal( name ) = quote_literal( name::text ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT regexp_matches('foobarbequebaz'::citext, '(bar)(beque)') = ARRAY[ 'bar', 'beque' ] AS t; + t +--- + t +(1 row) + +SELECT regexp_replace('Thomas'::citext, '.[mN]a.', 'M') = 'ThM' AS t; + t +--- + t +(1 row) + +SELECT regexp_split_to_array('hello world'::citext, E'\\s+') = ARRAY[ 'hello', 'world' ] AS t; + t +--- + t +(1 row) + +SELECT regexp_split_to_table('hello world'::citext, E'\\s+') AS words; + words +------- + hello + world +(2 rows) + +SELECT repeat('Pg'::citext, 4) = 'PgPgPgPg' AS t; + t +--- + t +(1 row) + +SELECT replace('abcdefabcdef'::citext, 'cd', 'XX') = 'abXXefabXXef' AS t; + t +--- + t +(1 row) + +SELECT rpad('hi'::citext, 5 ) = 'hi ' AS t; + t +--- + t +(1 row) + +SELECT rpad('hi'::citext, 5, 'xy'::citext) = 'hixyx' AS t; + t +--- + t +(1 row) + +SELECT rpad('hi'::text, 5, 'xy'::citext) = 'hixyx' AS t; + t +--- + t +(1 row) + +SELECT rpad('hi'::citext, 5, 'xy'::text ) = 'hixyx' AS t; + t +--- + t +(1 row) + +SELECT rtrim('trim '::citext ) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT rtrim('trimxxxx'::citext, 'x'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT rtrim('trimxxxx'::text, 'x'::citext) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT rtrim('trimxxxx'::text, 'x'::text ) = 'trim' AS t; + t +--- + t +(1 row) + +SELECT split_part('abc~@~def~@~ghi'::citext, '~@~', 2) = 'def' AS t; + t +--- + t +(1 row) + +SELECT strpos('high'::citext, 'ig' ) = 2 AS t; + t +--- + t +(1 row) + +SELECT strpos('high'::citext, 'ig'::citext) = 2 AS t; + t +--- + t +(1 row) + +-- to_ascii() does not support UTF-8. +-- to_hex() takes a numeric argument. +SELECT substr('alphabet', 3, 2) = 'ph' AS t; + t +--- + t +(1 row) + +SELECT translate('abcdefabcdef'::citext, 'cd', 'XX') = 'abXXefabXXef' AS t; + t +--- + t +(1 row) + +-- TODO These functions should work case-insensitively, but don't. +SELECT regexp_matches('foobarbequebaz'::citext, '(BAR)(BEQUE)') = ARRAY[ 'bar', 'beque' ] AS "t TODO"; + t TODO +-------- +(0 rows) + +SELECT regexp_replace('Thomas'::citext, '.[MN]A.', 'M') = 'THM' AS "t TODO"; + t TODO +-------- + f +(1 row) + +SELECT regexp_split_to_array('helloTworld'::citext, 't') = ARRAY[ 'hello', 'world' ] AS "t TODO"; + t TODO +-------- + f +(1 row) + +SELECT regexp_split_to_table('helloTworld'::citext, 't') AS "words TODO"; + words TODO +------------- + helloTworld +(1 row) + +SELECT replace('abcdefabcdef'::citext, 'CD', 'XX') = 'abXXefabXXef' AS "t TODO"; + t TODO +-------- + f +(1 row) + +SELECT split_part('abcTdefTghi'::citext, 't', 2) = 'def' AS "t TODO"; + t TODO +-------- + f +(1 row) + +SELECT strpos('high'::citext, 'IG'::citext) = 2 AS "t TODO"; + t TODO +-------- + f +(1 row) + +SELECT translate('abcdefabcdef'::citext, 'CD', 'XX') = 'abXXefabXXef' AS "t TODO"; + t TODO +-------- + f +(1 row) + +-- Table 9-20. Formatting Functions +SELECT to_date('05 Dec 2000'::citext, 'DD Mon YYYY'::citext) + = to_date('05 Dec 2000', 'DD Mon YYYY') AS t; + t +--- + t +(1 row) + +SELECT to_date('05 Dec 2000'::citext, 'DD Mon YYYY') + = to_date('05 Dec 2000', 'DD Mon YYYY') AS t; + t +--- + t +(1 row) + +SELECT to_date('05 Dec 2000', 'DD Mon YYYY'::citext) + = to_date('05 Dec 2000', 'DD Mon YYYY') AS t; + t +--- + t +(1 row) + +SELECT to_number('12,454.8-'::citext, '99G999D9S'::citext) + = to_number('12,454.8-', '99G999D9S') AS t; + t +--- + t +(1 row) + +SELECT to_number('12,454.8-'::citext, '99G999D9S') + = to_number('12,454.8-', '99G999D9S') AS t; + t +--- + t +(1 row) + +SELECT to_number('12,454.8-', '99G999D9S'::citext) + = to_number('12,454.8-', '99G999D9S') AS t; + t +--- + t +(1 row) + +SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY'::citext) + = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t; + t +--- + t +(1 row) + +SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY') + = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t; + t +--- + t +(1 row) + +SELECT to_timestamp('05 Dec 2000', 'DD Mon YYYY'::citext) + = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t; + t +--- + t +(1 row) + +-- Try assigning function results to a column. +SELECT COUNT(*) = 8::bigint AS t FROM try; + t +--- + t +(1 row) + +INSERT INTO try +VALUES ( to_char( now()::timestamp, 'HH12:MI:SS') ), + ( to_char( now() + '1 sec'::interval, 'HH12:MI:SS') ), -- timetamptz + ( to_char( '15h 2m 12s'::interval, 'HH24:MI:SS') ), + ( to_char( current_date, '999') ), + ( to_char( 125::int, '999') ), + ( to_char( 127::int4, '999') ), + ( to_char( 126::int8, '999') ), + ( to_char( 128.8::real, '999D9') ), + ( to_char( 125.7::float4, '999D9') ), + ( to_char( 125.9::float8, '999D9') ), + ( to_char( -125.8::numeric, '999D99S') ); +SELECT COUNT(*) = 19::bigint AS t FROM try; + t +--- + t +(1 row) + +SELECT like_escape( name, '' ) = like_escape( name::text, '' ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +SELECT like_escape( name::text, ''::citext ) =like_escape( name::text, '' ) AS t FROM srt; + t +--- + t + t + t + t + t +(5 rows) + +--- TODO: Get citext working with magic cast functions? +SELECT cidr( '192.168.1.2'::citext ) = cidr( '192.168.1.2'::text ) AS "t TODO"; +ERROR: function cidr(citext) does not exist +LINE 1: SELECT cidr( '192.168.1.2'::citext ) = cidr( '192.168.1.2'::... + ^ +HINT: No function matches the given name and argument types. You might need to add explicit type casts. +SELECT '192.168.1.2'::cidr::citext = '192.168.1.2'::cidr::text AS "t TODO"; +ERROR: cannot cast type cidr to citext +LINE 1: SELECT '192.168.1.2'::cidr::citext = '192.168.1.2'::cidr::te... + ^ diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql new file mode 100644 index 0000000000..04a297da02 --- /dev/null +++ b/contrib/citext/sql/citext.sql @@ -0,0 +1,328 @@ +-- +-- Test citext datatype +-- + +-- +-- first, define the datatype. Turn off echoing so that expected file +-- does not depend on contents of citext.sql. +-- +SET client_min_messages = warning; +\set ECHO none +\i citext.sql +RESET client_min_messages; +\set ECHO all + +-- Test the operators and indexing functions + +-- Test = and <>. +SELECT 'a'::citext = 'a'::citext AS t; +SELECT 'a'::citext = 'A'::citext AS t; +SELECT 'a'::citext = 'A'::text AS f; -- text wins the discussion +SELECT 'a'::citext = 'b'::citext AS f; +SELECT 'a'::citext = 'ab'::citext AS f; +SELECT 'a'::citext <> 'ab'::citext AS t; + +-- Multibyte sanity tests. Uncomment to run. +-- SELECT 'À'::citext = 'À'::citext AS t; +-- SELECT 'À'::citext = 'à'::citext AS t; +-- SELECT 'À'::text = 'à'::text AS f; -- text wins. +-- SELECT 'À'::citext <> 'B'::citext AS t; + +-- Test combining characters making up canonically equivalent strings. +-- SELECT 'Ä'::text <> 'Ä'::text AS t; +-- SELECT 'Ä'::citext <> 'Ä'::citext AS t; + +-- Test the Turkish dotted I. The lowercase is a single byte while the +-- uppercase is multibyte. This is why the comparison code can't be optimized +-- to compare string lengths. +-- SELECT 'i'::citext = 'İ'::citext AS t; + +-- Regression. +-- SELECT 'láska'::citext <> 'laská'::citext AS t; + +-- SELECT 'Ask Bjørn Hansen'::citext = 'Ask Bjørn Hansen'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext = 'ASK BJØRN HANSEN'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext <> 'Ask Bjorn Hansen'::citext AS t; +-- SELECT 'Ask Bjørn Hansen'::citext <> 'ASK BJORN HANSEN'::citext AS t; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS zero; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ask bjørn hansen'::citext) AS zero; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'ASK BJØRN HANSEN'::citext) AS zero; +-- SELECT citext_cmp('Ask Bjørn Hansen'::citext, 'Ask Bjorn Hansen'::citext) AS positive; +-- SELECT citext_cmp('Ask Bjorn Hansen'::citext, 'Ask Bjørn Hansen'::citext) AS negative; + +-- Test > and >= +SELECT 'B'::citext > 'a'::citext AS t; +SELECT 'b'::citext > 'A'::citext AS t; +SELECT 'B'::citext > 'b'::citext AS f; +SELECT 'B'::citext >= 'b'::citext AS t; + +-- Test < and <= +SELECT 'a'::citext < 'B'::citext AS t; +SELECT 'a'::citext <= 'B'::citext AS t; + +-- Test implicit casting. citext casts to text, but not vice-versa. +SELECT 'a'::citext = 'a'::text AS t; +SELECT 'A'::text <> 'a'::citext AS t; + +SELECT 'B'::citext < 'a'::text AS t; -- text wins. +SELECT 'B'::citext <= 'a'::text AS t; -- text wins. + +SELECT 'a'::citext > 'B'::text AS t; -- text wins. +SELECT 'a'::citext >= 'B'::text AS t; -- text wins. + +-- Test implicit casting. citext casts to varchar, but not vice-versa. +SELECT 'a'::citext = 'a'::varchar AS t; +SELECT 'A'::varchar <> 'a'::citext AS t; + +SELECT 'B'::citext < 'a'::varchar AS t; -- varchar wins. +SELECT 'B'::citext <= 'a'::varchar AS t; -- varchar wins. + +SELECT 'a'::citext > 'B'::varchar AS t; -- varchar wins. +SELECT 'a'::citext >= 'B'::varchar AS t; -- varchar wins. + +-- A couple of longer examlpes to ensure that we don't get any issues with bad +-- conversions to char[] in the c code. Yes, I did do this. + +SELECT 'aardvark'::citext = 'aardvark'::citext AS t; +SELECT 'aardvark'::citext = 'aardVark'::citext AS t; + +-- Check the citext_cmp() function explicitly. +SELECT citext_cmp('aardvark'::citext, 'aardvark'::citext) AS zero; +SELECT citext_cmp('aardvark'::citext, 'aardVark'::citext) AS zero; +SELECT citext_cmp('AARDVARK'::citext, 'AARDVARK'::citext) AS zero; +SELECT citext_cmp('B'::citext, 'a'::citext) AS one; + +-- Do some tests using a table and index. + +CREATE TEMP TABLE try ( + name citext PRIMARY KEY +); + +INSERT INTO try (name) +VALUES ('a'), ('ab'), ('â'), ('aba'), ('b'), ('ba'), ('bab'), ('AZ'); + +SELECT name, 'a' = name AS eq_a FROM try WHERE name <> 'â'; +SELECT name, 'a' = name AS t FROM try where name = 'a'; +SELECT name, 'A' = name AS "eq_A" FROM try WHERE name <> 'â'; +SELECT name, 'A' = name AS t FROM try where name = 'A'; +SELECT name, 'A' = name AS t FROM try where name = 'A'; + +-- expected failures on duplicate key +INSERT INTO try (name) VALUES ('a'); +INSERT INTO try (name) VALUES ('A'); +INSERT INTO try (name) VALUES ('aB'); + +-- Make sure that citext_smaller() and citext_lager() work properly. +SELECT citext_smaller( 'aa'::citext, 'ab'::citext ) = 'aa' AS t; +SELECT citext_smaller( 'AAAA'::citext, 'bbbb'::citext ) = 'AAAA' AS t; +SELECT citext_smaller( 'aardvark'::citext, 'Aaba'::citext ) = 'Aaba' AS t; +SELECT citext_smaller( 'aardvark'::citext, 'AARDVARK'::citext ) = 'AARDVARK' AS t; + +SELECT citext_larger( 'aa'::citext, 'ab'::citext ) = 'ab' AS t; +SELECT citext_larger( 'AAAA'::citext, 'bbbb'::citext ) = 'bbbb' AS t; +SELECT citext_larger( 'aardvark'::citext, 'Aaba'::citext ) = 'aardvark' AS t; + +-- Test aggregate functions and sort ordering + +CREATE TEMP TABLE srt ( + name CITEXT +); + +INSERT INTO srt (name) +VALUES ('aardvark'), + ('AAA'), + ('aba'), + ('ABC'), + ('abd'); + +-- Check the min() and max() aggregates, with and without index. +set enable_seqscan = off; +SELECT MIN(name) AS "AAA" FROM srt; +SELECT MAX(name) AS abd FROM srt; +reset enable_seqscan; +set enable_indexscan = off; +SELECT MIN(name) AS "AAA" FROM srt; +SELECT MAX(name) AS abd FROM srt; +reset enable_indexscan; + +-- Check sorting likewise +set enable_seqscan = off; +SELECT name FROM srt ORDER BY name; +reset enable_seqscan; +set enable_indexscan = off; +SELECT name FROM srt ORDER BY name; +reset enable_indexscan; + +-- Test assignment casts. +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::text; +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::varchar; +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::bpchar; +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'; +SELECT LOWER(name) as aaa FROM srt WHERE name = 'AAA'::citext; + +-- LIKE shoudl be case-insensitive +SELECT name FROM srt WHERE name LIKE '%a%' ORDER BY name; +SELECT name FROM srt WHERE name NOT LIKE '%b%' ORDER BY name; +SELECT name FROM srt WHERE name LIKE '%A%' ORDER BY name; +SELECT name FROM srt WHERE name NOT LIKE '%B%' ORDER BY name; + +-- ~~ should be case-insensitive +SELECT name FROM srt WHERE name ~~ '%a%' ORDER BY name; +SELECT name FROM srt WHERE name !~~ '%b%' ORDER BY name; +SELECT name FROM srt WHERE name ~~ '%A%' ORDER BY name; +SELECT name FROM srt WHERE name !~~ '%B%' ORDER BY name; + +-- ~ should be case-insensitive +SELECT name FROM srt WHERE name ~ '^a' ORDER BY name; +SELECT name FROM srt WHERE name !~ 'a$' ORDER BY name; +SELECT name FROM srt WHERE name ~ '^A' ORDER BY name; +SELECT name FROM srt WHERE name !~ 'A$' ORDER BY name; + +-- SIMILAR TO should be case-insensitive. +SELECT name FROM srt WHERE name SIMILAR TO '%a.*'; +SELECT name FROM srt WHERE name SIMILAR TO '%A.*'; + +-- Table 9-5. SQL String Functions and Operators +SELECT 'D'::citext || 'avid'::citext = 'David'::citext AS citext_concat; +SELECT 'Value: '::citext || 42 = 'Value: 42' AS text_concat; +SELECT 42 || ': value'::citext ='42: value' AS int_concat; +SELECT bit_length('jose'::citext) = 32 AS t; +SELECT bit_length( name ) = bit_length( name::text ) AS t FROM srt; +SELECT textlen( name ) = textlen( name::text ) AS t FROM srt; +SELECT char_length( name ) = char_length( name::text ) AS t FROM srt; +SELECT lower( name ) = lower( name::text ) AS t FROM srt; +SELECT octet_length( name ) = octet_length( name::text ) AS t FROM srt; +SELECT overlay( name placing 'hom' from 2 for 4) = overlay( name::text placing 'hom' from 2 for 4) AS t FROM srt; +SELECT position( 'a' IN name ) = position( 'a' IN name::text ) AS t FROM srt; + +SELECT substr('alphabet'::citext, 3) = 'phabet' AS t; +SELECT substr('alphabet'::citext, 3, 2) = 'ph' AS t; + +SELECT substring('alphabet'::citext, 3) = 'phabet' AS t; +SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t; +SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t; +SELECT substring('Thomas'::citext from 2) = 'homas' AS t; +SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; +SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; + +SELECT trim(' trim '::citext) = 'trim' AS t; +SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t; +SELECT trim('xxxxxxtrimxxxx'::text, 'x'::citext) = 'trim' AS t; +SELECT trim('xxxxxtrimxxxx'::text, 'x'::citext) = 'trim' AS t; + +SELECT upper( name ) = upper( name::text ) AS t FROM srt; + +-- Table 9-6. Other String Functions. +SELECT ascii( name ) = ascii( name::text ) AS t FROM srt; + +SELECT btrim(' trim'::citext ) = 'trim' AS t; +SELECT btrim('xxxxxtrimxxxx'::citext, 'x'::citext ) = 'trim' AS t; +SELECT btrim('xyxtrimyyx'::citext, 'xy'::citext) = 'trim' AS t; +SELECT btrim('xyxtrimyyx'::text, 'xy'::citext) = 'trim' AS t; +SELECT btrim('xyxtrimyyx'::citext, 'xy'::text ) = 'trim' AS t; + +-- chr() takes an int and returns text. +-- convert() and convert_from take bytea and return text. + +SELECT convert_to( name, 'ISO-8859-1' ) = convert_to( name::text, 'ISO-8859-1' ) AS t FROM srt; +SELECT decode('MTIzAAE='::citext, 'base64') = decode('MTIzAAE='::text, 'base64') AS t; +-- encode() takes bytea and returns text. +SELECT initcap('hi THOMAS'::citext) = initcap('hi THOMAS'::text) AS t; +SELECT length( name ) = length( name::text ) AS t FROM srt; + +SELECT lpad('hi'::citext, 5 ) = ' hi' AS t; +SELECT lpad('hi'::citext, 5, 'xy'::citext) = 'xyxhi' AS t; +SELECT lpad('hi'::text, 5, 'xy'::citext) = 'xyxhi' AS t; +SELECT lpad('hi'::citext, 5, 'xy'::text ) = 'xyxhi' AS t; + +SELECT ltrim(' trim'::citext ) = 'trim' AS t; +SELECT ltrim('zzzytrim'::citext, 'xyz'::citext) = 'trim' AS t; +SELECT ltrim('zzzytrim'::text, 'xyz'::citext) = 'trim' AS t; +SELECT ltrim('zzzytrim'::citext, 'xyz'::text ) = 'trim' AS t; + +SELECT md5( name ) = md5( name::text ) AS t FROM srt; +-- pg_client_encoding() takes no args and returns name. +SELECT quote_ident( name ) = quote_ident( name::text ) AS t FROM srt; +SELECT quote_literal( name ) = quote_literal( name::text ) AS t FROM srt; + +SELECT regexp_matches('foobarbequebaz'::citext, '(bar)(beque)') = ARRAY[ 'bar', 'beque' ] AS t; +SELECT regexp_replace('Thomas'::citext, '.[mN]a.', 'M') = 'ThM' AS t; +SELECT regexp_split_to_array('hello world'::citext, E'\\s+') = ARRAY[ 'hello', 'world' ] AS t; +SELECT regexp_split_to_table('hello world'::citext, E'\\s+') AS words; + +SELECT repeat('Pg'::citext, 4) = 'PgPgPgPg' AS t; +SELECT replace('abcdefabcdef'::citext, 'cd', 'XX') = 'abXXefabXXef' AS t; + +SELECT rpad('hi'::citext, 5 ) = 'hi ' AS t; +SELECT rpad('hi'::citext, 5, 'xy'::citext) = 'hixyx' AS t; +SELECT rpad('hi'::text, 5, 'xy'::citext) = 'hixyx' AS t; +SELECT rpad('hi'::citext, 5, 'xy'::text ) = 'hixyx' AS t; + +SELECT rtrim('trim '::citext ) = 'trim' AS t; +SELECT rtrim('trimxxxx'::citext, 'x'::citext) = 'trim' AS t; +SELECT rtrim('trimxxxx'::text, 'x'::citext) = 'trim' AS t; +SELECT rtrim('trimxxxx'::text, 'x'::text ) = 'trim' AS t; + +SELECT split_part('abc~@~def~@~ghi'::citext, '~@~', 2) = 'def' AS t; +SELECT strpos('high'::citext, 'ig' ) = 2 AS t; +SELECT strpos('high'::citext, 'ig'::citext) = 2 AS t; +-- to_ascii() does not support UTF-8. +-- to_hex() takes a numeric argument. +SELECT substr('alphabet', 3, 2) = 'ph' AS t; +SELECT translate('abcdefabcdef'::citext, 'cd', 'XX') = 'abXXefabXXef' AS t; + +-- TODO These functions should work case-insensitively, but don't. +SELECT regexp_matches('foobarbequebaz'::citext, '(BAR)(BEQUE)') = ARRAY[ 'bar', 'beque' ] AS "t TODO"; +SELECT regexp_replace('Thomas'::citext, '.[MN]A.', 'M') = 'THM' AS "t TODO"; +SELECT regexp_split_to_array('helloTworld'::citext, 't') = ARRAY[ 'hello', 'world' ] AS "t TODO"; +SELECT regexp_split_to_table('helloTworld'::citext, 't') AS "words TODO"; +SELECT replace('abcdefabcdef'::citext, 'CD', 'XX') = 'abXXefabXXef' AS "t TODO"; +SELECT split_part('abcTdefTghi'::citext, 't', 2) = 'def' AS "t TODO"; +SELECT strpos('high'::citext, 'IG'::citext) = 2 AS "t TODO"; +SELECT translate('abcdefabcdef'::citext, 'CD', 'XX') = 'abXXefabXXef' AS "t TODO"; + +-- Table 9-20. Formatting Functions +SELECT to_date('05 Dec 2000'::citext, 'DD Mon YYYY'::citext) + = to_date('05 Dec 2000', 'DD Mon YYYY') AS t; +SELECT to_date('05 Dec 2000'::citext, 'DD Mon YYYY') + = to_date('05 Dec 2000', 'DD Mon YYYY') AS t; +SELECT to_date('05 Dec 2000', 'DD Mon YYYY'::citext) + = to_date('05 Dec 2000', 'DD Mon YYYY') AS t; + +SELECT to_number('12,454.8-'::citext, '99G999D9S'::citext) + = to_number('12,454.8-', '99G999D9S') AS t; +SELECT to_number('12,454.8-'::citext, '99G999D9S') + = to_number('12,454.8-', '99G999D9S') AS t; +SELECT to_number('12,454.8-', '99G999D9S'::citext) + = to_number('12,454.8-', '99G999D9S') AS t; + +SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY'::citext) + = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t; +SELECT to_timestamp('05 Dec 2000'::citext, 'DD Mon YYYY') + = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t; +SELECT to_timestamp('05 Dec 2000', 'DD Mon YYYY'::citext) + = to_timestamp('05 Dec 2000', 'DD Mon YYYY') AS t; + +-- Try assigning function results to a column. +SELECT COUNT(*) = 8::bigint AS t FROM try; +INSERT INTO try +VALUES ( to_char( now()::timestamp, 'HH12:MI:SS') ), + ( to_char( now() + '1 sec'::interval, 'HH12:MI:SS') ), -- timetamptz + ( to_char( '15h 2m 12s'::interval, 'HH24:MI:SS') ), + ( to_char( current_date, '999') ), + ( to_char( 125::int, '999') ), + ( to_char( 127::int4, '999') ), + ( to_char( 126::int8, '999') ), + ( to_char( 128.8::real, '999D9') ), + ( to_char( 125.7::float4, '999D9') ), + ( to_char( 125.9::float8, '999D9') ), + ( to_char( -125.8::numeric, '999D99S') ); + +SELECT COUNT(*) = 19::bigint AS t FROM try; + +SELECT like_escape( name, '' ) = like_escape( name::text, '' ) AS t FROM srt; +SELECT like_escape( name::text, ''::citext ) =like_escape( name::text, '' ) AS t FROM srt; + +--- TODO: Get citext working with magic cast functions? +SELECT cidr( '192.168.1.2'::citext ) = cidr( '192.168.1.2'::text ) AS "t TODO"; +SELECT '192.168.1.2'::cidr::citext = '192.168.1.2'::cidr::text AS "t TODO"; diff --git a/contrib/citext/uninstall_citext.sql b/contrib/citext/uninstall_citext.sql new file mode 100644 index 0000000000..58673260bb --- /dev/null +++ b/contrib/citext/uninstall_citext.sql @@ -0,0 +1,69 @@ +/* $PostgreSQL: pgsql/contrib/citext/uninstall_citext.sql,v 1.1 2008/07/29 18:31:20 tgl Exp $ */ + +-- Adjust this setting to control where the objects get dropped. +SET search_path = public; + +DROP OPERATOR CLASS citext_ops USING btree CASCADE; +DROP OPERATOR CLASS citext_ops USING hash CASCADE; + +DROP AGGREGATE min(citext); +DROP AGGREGATE max(citext); + +DROP OPERATOR = (citext, citext); +DROP OPERATOR <> (citext, citext); +DROP OPERATOR < (citext, citext); +DROP OPERATOR <= (citext, citext); +DROP OPERATOR >= (citext, citext); +DROP OPERATOR > (citext, citext); +DROP OPERATOR || (citext, citext); + +DROP OPERATOR ~ (citext, citext); +DROP OPERATOR ~* (citext, citext); +DROP OPERATOR !~ (citext, citext); +DROP OPERATOR !~* (citext, citext); +DROP OPERATOR ~~ (citext, citext); +DROP OPERATOR ~~* (citext, citext); +DROP OPERATOR !~~ (citext, citext); +DROP OPERATOR !~~* (citext, citext); + +DROP OPERATOR ~ (citext, text); +DROP OPERATOR ~* (citext, text); +DROP OPERATOR !~ (citext, text); +DROP OPERATOR !~* (citext, text); +DROP OPERATOR ~~ (citext, text); +DROP OPERATOR ~~* (citext, text); +DROP OPERATOR !~~ (citext, text); +DROP OPERATOR !~~* (citext, text); + +DROP CAST (citext AS text); +DROP CAST (citext AS varchar); +DROP CAST (citext AS bpchar); +DROP CAST (text AS citext); +DROP CAST (varchar AS citext); +DROP CAST (bpchar AS citext); + +DROP FUNCTION citext(bpchar); +DROP FUNCTION citext_eq(citext, citext); +DROP FUNCTION citext_ne(citext, citext); +DROP FUNCTION citext_lt(citext, citext); +DROP FUNCTION citext_le(citext, citext); +DROP FUNCTION citext_gt(citext, citext); +DROP FUNCTION citext_ge(citext, citext); +DROP FUNCTION textcat(citext, citext); +DROP FUNCTION citext_cmp(citext, citext); +DROP FUNCTION citext_hash(citext); +DROP FUNCTION citext_smaller(citext, citext); +DROP FUNCTION citext_larger(citext, citext); +DROP FUNCTION lower(citext); +DROP FUNCTION upper(citext); +DROP FUNCTION quote_literal(citext); +DROP FUNCTION texticlike(citext, citext); +DROP FUNCTION texticnlike(citext, citext); +DROP FUNCTION texticregexeq(citext, citext); +DROP FUNCTION texticregexne(citext, citext); +DROP FUNCTION texticlike(citext, text); +DROP FUNCTION texticnlike(citext, text); +DROP FUNCTION texticregexeq(citext, text); +DROP FUNCTION texticregexne(citext, text); + +DROP TYPE citext CASCADE; diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml new file mode 100644 index 0000000000..cb91da8897 --- /dev/null +++ b/doc/src/sgml/citext.sgml @@ -0,0 +1,222 @@ + + + + citext + + + citext + + + + The citext module provides a case-insensitive + character string type, citext. Essentially, it internally calls + lower when comparing values. Otherwise, it behaves almost + exactly like text. + + + + Rationale + + + The standard approach to doing case-insensitive matches + in PostgreSQL has been to use the lower + function when comparing values, for example + + + SELECT * FROM tab WHERE lower(col) = LOWER(?); + + + + + This works reasonably well, but has a number of drawbacks: + + + + + + It makes your SQL statements verbose, and you always have to remember to + use lower on both the column and the query value. + + + + + It won't use an index, unless you create a functional index using + lower. + + + + + If you declare a column as UNIQUE or PRIMARY + KEY, the implicitly generated index is case-sensitive. So it's + useless for case-insensitive searches, and it won't enforce + uniqueness case-insensitively. + + + + + + The citext data type allows you to eliminate calls + to lower in SQL queries, and allows a primary key to + be case-insensitive. citext is locale-aware, just + like text, which means that the comparison of uppercase and + lowercase characters is dependent on the rules of + the LC_CTYPE locale setting. Again, this behavior is + identical to the use of lower in queries. But because it's + done transparently by the datatype, you don't have to remember to do + anything special in your queries. + + + + + + How to Use It + + + Here's a simple example of usage: + + + CREATE TABLE users ( + nick CITEXT PRIMARY KEY, + pass TEXT NOT NULL + ); + + INSERT INTO users VALUES ( 'larry', md5(random()::text) ); + INSERT INTO users VALUES ( 'Tom', md5(random()::text) ); + INSERT INTO users VALUES ( 'Damian', md5(random()::text) ); + INSERT INTO users VALUES ( 'NEAL', md5(random()::text) ); + INSERT INTO users VALUES ( 'Bjørn', md5(random()::text) ); + + SELECT * FROM users WHERE nick = 'Larry'; + + + The SELECT statement will return one tuple, even though + the nick column was set to larry and the query + was for Larry. + + + + + Limitations + + + + + citext's behavior depends on + the LC_CTYPE setting of your database. How it compares + values is therefore determined when + initdb is run to create the cluster. It is not truly + case-insensitive in the terms defined by the Unicode standard. + Effectively, what this means is that, as long as you're happy with your + collation, you should be happy with citext's comparisons. But + if you have data in different languages stored in your database, users + of one language may find their query results are not as expected if the + collation is for another language. + + + + + + The pattern-matching comparison operators, such as LIKE, + ~, ~~, !~, !~~, etc., + have been overloaded to make case-insensitive comparisons when their + left-hand argument is of type citext. However, related + functions have not been changed, including: + + + + + + regexp_replace() + + + + + regexp_split_to_array() + + + + + regexp_split_to_table() + + + + + replace() + + + + + split_part() + + + + + strpos() + + + + + translate() + + + + + + Of course, for the regular expression functions, you can specify + case-insensitive comparisons in their flags arguments, but + the same cannot be done for the the non-regexp functions. + + + + + + citext is not as efficient as text because the + operator functions and the btree comparison functions must make copies + of the data and convert it to lower case for comparisons. It is, + however, slightly more efficient than using lower to get + case-insensitive matching. + + + + + + PostgreSQL supports casting between text + and any other type (even non-string types) by using the other type's + I/O functions. This doesn't work with citext. However, + you can cast via I/O functions in two steps, for example + somevalue::text::citext or + citextvalue::text::sometype. + + + + + + citext doesn't help much if you need data to compare + case-sensitively in some contexts and case-insensitively in other + contexts. The standard answer is to use the text type and + manually use the lower function when you need to compare + case-insensitively; this works all right if case-insensitive comparison + is needed only infrequently. If you need case-insensitive most of + the time and case-sensitive infrequently, consider storing the data + as citext and explicitly casting the column to text + when you want case-sensitive comparison. In either situation, you + will need two indexes if you want both types of searches to be fast. + + + + + + + Author + + + David E. Wheeler david@kineticode.com + + + + Inspired by the original citext module by Donald Fraser. + + + + + diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml index b9d78c1c76..bac5044205 100644 --- a/doc/src/sgml/contrib.sgml +++ b/doc/src/sgml/contrib.sgml @@ -1,4 +1,4 @@ - + Additional Supplied Modules @@ -81,6 +81,7 @@ psql -d dbname -f SHAREDIR/contrib/module.sql &adminpack; &btree-gist; &chkpass; + &citext; &cube; &dblink; &dict-int; diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml index b6305dc535..32aa90400b 100644 --- a/doc/src/sgml/filelist.sgml +++ b/doc/src/sgml/filelist.sgml @@ -1,4 +1,4 @@ - + @@ -94,6 +94,7 @@ +