postgresql/src/backend/utils/adt/varchar.c

749 lines
17 KiB
C

/*-------------------------------------------------------------------------
*
* varchar.c
* Functions for the built-in types char(n) and varchar(n).
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varchar.c,v 1.109 2005/04/12 04:26:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/hash.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/fmgroids.h"
#include "mb/pg_wchar.h"
/*
* CHAR() and VARCHAR() types are part of the ANSI SQL standard. CHAR()
* is for blank-padded string whose length is specified in CREATE TABLE.
* VARCHAR is for storing string whose length is at most the length specified
* at CREATE TABLE time.
*
* It's hard to implement these types because we cannot figure out
* the length of the type from the type itself. I changed (hopefully all) the
* fmgr calls that invoke input functions of a data type to supply the
* length also. (eg. in INSERTs, we have the tupleDescriptor which contains
* the length of the attributes and hence the exact length of the char() or
* varchar(). We pass this to bpcharin() or varcharin().) In the case where
* we cannot determine the length, we pass in -1 instead and the input string
* must be null-terminated.
*
* We actually implement this as a varlena so that we don't have to pass in
* the length for the comparison functions. (The difference between these
* types and "text" is that we truncate and possibly blank-pad the string
* at insertion time.)
*
* - ay 6/95
*/
/*****************************************************************************
* bpchar - char() *
*****************************************************************************/
/*
* Convert a C string to CHARACTER internal representation. atttypmod
* is the declared length of the type plus VARHDRSZ.
*
* If the C string is too long, raise an error, unless the extra
* characters are spaces, in which case they're truncated. (per SQL)
*/
Datum
bpcharin(PG_FUNCTION_ARGS)
{
char *s = PG_GETARG_CSTRING(0);
#ifdef NOT_USED
Oid typelem = PG_GETARG_OID(1);
#endif
int32 atttypmod = PG_GETARG_INT32(2);
BpChar *result;
char *r;
size_t len,
maxlen;
int i;
int charlen; /* number of charcters in the input string */
/* verify encoding */
len = strlen(s);
pg_verifymbstr(s, len, false);
charlen = pg_mbstrlen(s);
/* If typmod is -1 (or invalid), use the actual string length */
if (atttypmod < (int32) VARHDRSZ)
maxlen = charlen;
else
maxlen = atttypmod - VARHDRSZ;
if (charlen > maxlen)
{
/* Verify that extra characters are spaces, and clip them off */
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
/*
* at this point, len is the actual BYTE length of the input
* string, maxlen is the max number of CHARACTERS allowed for this
* bpchar type.
*/
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
len = mbmaxlen;
else
ereport(ERROR,
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
errmsg("value too long for type character(%d)",
(int) maxlen)));
/*
* XXX: at this point, maxlen is the necessary byte length, not
* the number of CHARACTERS!
*/
maxlen = len;
}
else
{
/*
* XXX: at this point, maxlen is the necessary byte length, not
* the number of CHARACTERS!
*/
maxlen = len + (maxlen - charlen);
}
result = palloc(maxlen + VARHDRSZ);
VARATT_SIZEP(result) = maxlen + VARHDRSZ;
r = VARDATA(result);
for (i = 0; i < len; i++)
*r++ = *s++;
/* blank pad the string if necessary */
for (; i < maxlen; i++)
*r++ = ' ';
PG_RETURN_BPCHAR_P(result);
}
/*
* Convert a CHARACTER value to a C string.
*/
Datum
bpcharout(PG_FUNCTION_ARGS)
{
BpChar *s = PG_GETARG_BPCHAR_P(0);
char *result;
int len;
/* copy and add null term */
len = VARSIZE(s) - VARHDRSZ;
result = (char *) palloc(len + 1);
memcpy(result, VARDATA(s), len);
result[len] = '\0';
PG_RETURN_CSTRING(result);
}
/*
* bpcharrecv - converts external binary format to bpchar
*/
Datum
bpcharrecv(PG_FUNCTION_ARGS)
{
/* Exactly the same as textrecv, so share code */
return textrecv(fcinfo);
}
/*
* bpcharsend - converts bpchar to binary format
*/
Datum
bpcharsend(PG_FUNCTION_ARGS)
{
/* Exactly the same as textsend, so share code */
return textsend(fcinfo);
}
/*
* Converts a CHARACTER type to the specified size.
*
* maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
* isExplicit is true if this is for an explicit cast to char(N).
*
* Truncation rules: for an explicit cast, silently truncate to the given
* length; for an implicit cast, raise error unless extra characters are
* all spaces. (This is sort-of per SQL: the spec would actually have us
* raise a "completion condition" for the explicit cast case, but Postgres
* hasn't got such a concept.)
*/
Datum
bpchar(PG_FUNCTION_ARGS)
{
BpChar *source = PG_GETARG_BPCHAR_P(0);
int32 maxlen = PG_GETARG_INT32(1);
bool isExplicit = PG_GETARG_BOOL(2);
BpChar *result;
int32 len;
char *r;
char *s;
int i;
int charlen; /* number of charcters in the input string
* + VARHDRSZ */
len = VARSIZE(source);
charlen = pg_mbstrlen_with_len(VARDATA(source), len - VARHDRSZ) + VARHDRSZ;
/* No work if typmod is invalid or supplied data matches it already */
if (maxlen < (int32) VARHDRSZ || charlen == maxlen)
PG_RETURN_BPCHAR_P(source);
if (charlen > maxlen)
{
/* Verify that extra characters are spaces, and clip them off */
size_t maxmblen;
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
maxlen - VARHDRSZ) + VARHDRSZ;
if (!isExplicit)
{
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
if (*(VARDATA(source) + i) != ' ')
ereport(ERROR,
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
errmsg("value too long for type character(%d)",
maxlen - VARHDRSZ)));
}
len = maxmblen;
/*
* XXX: at this point, maxlen is the necessary byte
* length+VARHDRSZ, not the number of CHARACTERS!
*/
maxlen = len;
}
else
{
/*
* XXX: at this point, maxlen is the necessary byte
* length+VARHDRSZ, not the number of CHARACTERS!
*/
maxlen = len + (maxlen - charlen);
}
s = VARDATA(source);
result = palloc(maxlen);
VARATT_SIZEP(result) = maxlen;
r = VARDATA(result);
for (i = 0; i < len - VARHDRSZ; i++)
*r++ = *s++;
/* blank pad the string if necessary */
for (; i < maxlen - VARHDRSZ; i++)
*r++ = ' ';
PG_RETURN_BPCHAR_P(result);
}
/* char_bpchar()
* Convert char to bpchar(1).
*/
Datum
char_bpchar(PG_FUNCTION_ARGS)
{
char c = PG_GETARG_CHAR(0);
BpChar *result;
result = (BpChar *) palloc(VARHDRSZ + 1);
VARATT_SIZEP(result) = VARHDRSZ + 1;
*(VARDATA(result)) = c;
PG_RETURN_BPCHAR_P(result);
}
/* bpchar_name()
* Converts a bpchar() type to a NameData type.
*/
Datum
bpchar_name(PG_FUNCTION_ARGS)
{
BpChar *s = PG_GETARG_BPCHAR_P(0);
Name result;
int len;
len = VARSIZE(s) - VARHDRSZ;
/* Truncate to max length for a Name */
if (len >= NAMEDATALEN)
len = NAMEDATALEN - 1;
/* Remove trailing blanks */
while (len > 0)
{
if (*(VARDATA(s) + len - 1) != ' ')
break;
len--;
}
result = (NameData *) palloc(NAMEDATALEN);
memcpy(NameStr(*result), VARDATA(s), len);
/* Now null pad to full length... */
while (len < NAMEDATALEN)
{
*(NameStr(*result) + len) = '\0';
len++;
}
PG_RETURN_NAME(result);
}
/* name_bpchar()
* Converts a NameData type to a bpchar type.
*/
Datum
name_bpchar(PG_FUNCTION_ARGS)
{
Name s = PG_GETARG_NAME(0);
BpChar *result;
int len;
len = strlen(NameStr(*s));
result = (BpChar *) palloc(VARHDRSZ + len);
memcpy(VARDATA(result), NameStr(*s), len);
VARATT_SIZEP(result) = len + VARHDRSZ;
PG_RETURN_BPCHAR_P(result);
}
/*****************************************************************************
* varchar - varchar(n)
*
* Note: varchar piggybacks on type text for most operations, and so has no
* C-coded functions except for I/O and typmod checking.
*****************************************************************************/
/*
* Convert a C string to VARCHAR internal representation. atttypmod
* is the declared length of the type plus VARHDRSZ.
*
* Note that atttypmod is regarded as the number of characters, which
* is not necessarily the same as the number of bytes.
*
* If the C string is too long, raise an error, unless the extra characters
* are spaces, in which case they're truncated. (per SQL)
*/
Datum
varcharin(PG_FUNCTION_ARGS)
{
char *s = PG_GETARG_CSTRING(0);
#ifdef NOT_USED
Oid typelem = PG_GETARG_OID(1);
#endif
int32 atttypmod = PG_GETARG_INT32(2);
VarChar *result;
size_t len,
maxlen;
/* verify encoding */
len = strlen(s);
pg_verifymbstr(s, len, false);
maxlen = atttypmod - VARHDRSZ;
if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
{
/* Verify that extra characters are spaces, and clip them off */
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
len = mbmaxlen;
else
ereport(ERROR,
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
errmsg("value too long for type character varying(%d)",
(int) maxlen)));
}
result = palloc(len + VARHDRSZ);
VARATT_SIZEP(result) = len + VARHDRSZ;
memcpy(VARDATA(result), s, len);
PG_RETURN_VARCHAR_P(result);
}
/*
* Convert a VARCHAR value to a C string.
*/
Datum
varcharout(PG_FUNCTION_ARGS)
{
VarChar *s = PG_GETARG_VARCHAR_P(0);
char *result;
int32 len;
/* copy and add null term */
len = VARSIZE(s) - VARHDRSZ;
result = palloc(len + 1);
memcpy(result, VARDATA(s), len);
result[len] = '\0';
PG_RETURN_CSTRING(result);
}
/*
* varcharrecv - converts external binary format to varchar
*/
Datum
varcharrecv(PG_FUNCTION_ARGS)
{
/* Exactly the same as textrecv, so share code */
return textrecv(fcinfo);
}
/*
* varcharsend - converts varchar to binary format
*/
Datum
varcharsend(PG_FUNCTION_ARGS)
{
/* Exactly the same as textsend, so share code */
return textsend(fcinfo);
}
/*
* Converts a VARCHAR type to the specified size.
*
* maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
* isExplicit is true if this is for an explicit cast to varchar(N).
*
* Truncation rules: for an explicit cast, silently truncate to the given
* length; for an implicit cast, raise error unless extra characters are
* all spaces. (This is sort-of per SQL: the spec would actually have us
* raise a "completion condition" for the explicit cast case, but Postgres
* hasn't got such a concept.)
*/
Datum
varchar(PG_FUNCTION_ARGS)
{
VarChar *source = PG_GETARG_VARCHAR_P(0);
int32 maxlen = PG_GETARG_INT32(1);
bool isExplicit = PG_GETARG_BOOL(2);
VarChar *result;
int32 len;
size_t maxmblen;
int i;
len = VARSIZE(source);
/* No work if typmod is invalid or supplied data fits it already */
if (maxlen < (int32) VARHDRSZ || len <= maxlen)
PG_RETURN_VARCHAR_P(source);
/* only reach here if string is too long... */
/* truncate multibyte string preserving multibyte boundary */
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
maxlen - VARHDRSZ);
if (!isExplicit)
{
for (i = maxmblen; i < len - VARHDRSZ; i++)
if (*(VARDATA(source) + i) != ' ')
ereport(ERROR,
(errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
errmsg("value too long for type character varying(%d)",
maxlen - VARHDRSZ)));
}
len = maxmblen + VARHDRSZ;
result = palloc(len);
VARATT_SIZEP(result) = len;
memcpy(VARDATA(result), VARDATA(source), len - VARHDRSZ);
PG_RETURN_VARCHAR_P(result);
}
/*****************************************************************************
* Exported functions
*****************************************************************************/
/* "True" length (not counting trailing blanks) of a BpChar */
static int
bcTruelen(BpChar *arg)
{
char *s = VARDATA(arg);
int i;
int len;
len = VARSIZE(arg) - VARHDRSZ;
for (i = len - 1; i >= 0; i--)
{
if (s[i] != ' ')
break;
}
return i + 1;
}
Datum
bpcharlen(PG_FUNCTION_ARGS)
{
BpChar *arg = PG_GETARG_BPCHAR_P(0);
int len;
/* get number of bytes, ignoring trailing spaces */
len = bcTruelen(arg);
/* in multibyte encoding, convert to number of characters */
if (pg_database_encoding_max_length() != 1)
len = pg_mbstrlen_with_len(VARDATA(arg), len);
PG_RETURN_INT32(len);
}
Datum
bpcharoctetlen(PG_FUNCTION_ARGS)
{
BpChar *arg = PG_GETARG_BPCHAR_P(0);
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
}
/*****************************************************************************
* Comparison Functions used for bpchar
*
* Note: btree indexes need these routines not to leak memory; therefore,
* be careful to free working copies of toasted datums. Most places don't
* need to be so careful.
*****************************************************************************/
Datum
bpchareq(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
bool result;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
/* fast path for different-length inputs */
if (len1 != len2)
result = false;
else
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) == 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(result);
}
Datum
bpcharne(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
bool result;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
/* fast path for different-length inputs */
if (len1 != len2)
result = true;
else
result = (varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2) != 0);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(result);
}
Datum
bpcharlt(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(cmp < 0);
}
Datum
bpcharle(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(cmp <= 0);
}
Datum
bpchargt(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(cmp > 0);
}
Datum
bpcharge(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_BOOL(cmp >= 0);
}
Datum
bpcharcmp(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_FREE_IF_COPY(arg1, 0);
PG_FREE_IF_COPY(arg2, 1);
PG_RETURN_INT32(cmp);
}
Datum
bpchar_larger(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
}
Datum
bpchar_smaller(PG_FUNCTION_ARGS)
{
BpChar *arg1 = PG_GETARG_BPCHAR_P(0);
BpChar *arg2 = PG_GETARG_BPCHAR_P(1);
int len1,
len2;
int cmp;
len1 = bcTruelen(arg1);
len2 = bcTruelen(arg2);
cmp = varstr_cmp(VARDATA(arg1), len1, VARDATA(arg2), len2);
PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
}
/*
* bpchar needs a specialized hash function because we want to ignore
* trailing blanks in comparisons.
*
* XXX is there any need for locale-specific behavior here?
*/
Datum
hashbpchar(PG_FUNCTION_ARGS)
{
BpChar *key = PG_GETARG_BPCHAR_P(0);
char *keydata;
int keylen;
Datum result;
keydata = VARDATA(key);
keylen = bcTruelen(key);
result = hash_any((unsigned char *) keydata, keylen);
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
return result;
}