postgresql/src/backend/utils/adt/ascii.c

151 lines
3.5 KiB
C
Raw Normal View History

/*-----------------------------------------------------------------------
* ascii.c
* The PostgreSQL routine for string to ascii conversion.
*
2005-01-01 06:43:09 +01:00
* Portions Copyright (c) 1999-2005, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/ascii.c,v 1.25 2005/09/24 17:53:15 tgl Exp $
*
*-----------------------------------------------------------------------
*/
#include "postgres.h"
#include "utils/builtins.h"
#include "mb/pg_wchar.h"
#include "utils/ascii.h"
static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
2003-08-04 02:43:34 +02:00
unsigned char *dest, int enc);
static text *encode_to_ascii(text *data, int enc);
/* ----------
2001-03-22 05:01:46 +01:00
* to_ascii
* ----------
*/
static void
pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
{
unsigned char *x;
const unsigned char *ascii;
int range;
2001-03-22 05:01:46 +01:00
/*
* relevant start for an encoding
*/
#define RANGE_128 128
#define RANGE_160 160
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
if (enc == PG_LATIN1)
{
/*
* ISO-8859-1 <range: 160 -- 255>
*/
ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
range = RANGE_160;
}
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
else if (enc == PG_LATIN2)
{
/*
* ISO-8859-2 <range: 160 -- 255>
*/
ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
range = RANGE_160;
}
else if (enc == PG_LATIN9)
{
/*
* ISO-8859-15 <range: 160 -- 255>
*/
ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
range = RANGE_160;
}
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
else if (enc == PG_WIN1250)
{
/*
* Window CP1250 <range: 128 -- 255>
*/
ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
range = RANGE_128;
}
else
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2004-08-29 07:07:03 +02:00
errmsg("encoding conversion from %s to ASCII not supported",
pg_encoding_to_char(enc))));
return; /* keep compiler quiet */
}
2001-03-22 05:01:46 +01:00
/*
* Encode
*/
for (x = src; x < src_end; x++)
{
2001-03-22 05:01:46 +01:00
if (*x < 128)
*dest++ = *x;
else if (*x < range)
*dest++ = ' '; /* bogus 128 to 'range' */
else
*dest++ = ascii[*x - range];
2001-03-22 05:01:46 +01:00
}
}
/* ----------
* encode text
*
* The text datum is overwritten in-place, therefore this coding method
* cannot support conversions that change the string length!
* ----------
*/
static text *
encode_to_ascii(text *data, int enc)
{
pg_to_ascii((unsigned char *) VARDATA(data), /* src */
2003-08-04 02:43:34 +02:00
(unsigned char *) (data) + VARSIZE(data), /* src end */
(unsigned char *) VARDATA(data), /* dest */
2001-03-22 05:01:46 +01:00
enc); /* encoding */
return data;
}
/* ----------
* convert to ASCII - enc is set as 'name' arg.
* ----------
*/
Datum
to_ascii_encname(PG_FUNCTION_ARGS)
{
2003-08-04 02:43:34 +02:00
text *data = PG_GETARG_TEXT_P_COPY(0);
int enc = pg_char_to_encoding(NameStr(*PG_GETARG_NAME(1)));
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
}
/* ----------
* convert to ASCII - enc is set as int4
* ----------
*/
2001-03-22 05:01:46 +01:00
Datum
to_ascii_enc(PG_FUNCTION_ARGS)
{
2003-08-04 02:43:34 +02:00
text *data = PG_GETARG_TEXT_P_COPY(0);
int enc = PG_GETARG_INT32(1);
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
}
/* ----------
* convert to ASCII - current enc is DatabaseEncoding
* ----------
*/
Datum
to_ascii_default(PG_FUNCTION_ARGS)
{
2003-08-04 02:43:34 +02:00
text *data = PG_GETARG_TEXT_P_COPY(0);
int enc = GetDatabaseEncoding();
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
}