2003-07-14 18:41:38 +02:00
|
|
|
/*-----------------------------------------------------------------------
|
2000-08-05 16:59:29 +02:00
|
|
|
* ascii.c
|
2003-07-14 18:41:38 +02:00
|
|
|
* The PostgreSQL routine for string to ascii conversion.
|
2000-08-05 16:59:29 +02:00
|
|
|
*
|
2004-08-29 06:13:13 +02:00
|
|
|
* Portions Copyright (c) 1999-2004, PostgreSQL Global Development Group
|
2000-08-05 16:59:29 +02:00
|
|
|
*
|
2003-07-14 18:41:38 +02:00
|
|
|
* IDENTIFICATION
|
2004-08-29 07:07:03 +02:00
|
|
|
* $PostgreSQL: pgsql/src/backend/utils/adt/ascii.c,v 1.22 2004/08/29 05:06:49 momjian Exp $
|
2000-08-05 16:59:29 +02:00
|
|
|
*
|
2003-07-14 18:41:38 +02:00
|
|
|
*-----------------------------------------------------------------------
|
2000-08-05 16:59:29 +02:00
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
2003-07-14 18:41:38 +02:00
|
|
|
|
2000-08-05 16:59:29 +02:00
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "mb/pg_wchar.h"
|
|
|
|
#include "utils/ascii.h"
|
|
|
|
|
2003-07-14 18:41:38 +02:00
|
|
|
static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
|
2003-08-04 02:43:34 +02:00
|
|
|
unsigned char *dest, int enc);
|
2000-08-05 16:59:29 +02:00
|
|
|
static text *encode_to_ascii(text *data, int enc);
|
|
|
|
|
2003-07-14 18:41:38 +02:00
|
|
|
|
2000-08-05 16:59:29 +02:00
|
|
|
/* ----------
|
2001-03-22 05:01:46 +01:00
|
|
|
* to_ascii
|
2000-08-05 16:59:29 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2003-07-14 18:41:38 +02:00
|
|
|
static void
|
|
|
|
pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
|
2000-08-05 16:59:29 +02:00
|
|
|
{
|
2003-04-02 23:07:59 +02:00
|
|
|
unsigned char *x;
|
|
|
|
unsigned char *ascii;
|
|
|
|
int range;
|
2001-03-22 05:01:46 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* relevant start for an encoding
|
|
|
|
*/
|
|
|
|
#define RANGE_128 128
|
|
|
|
#define RANGE_160 160
|
|
|
|
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
if (enc == PG_LATIN1)
|
2000-08-05 16:59:29 +02:00
|
|
|
{
|
2001-03-22 07:16:21 +01:00
|
|
|
/*
|
2000-08-05 16:59:29 +02:00
|
|
|
* ISO-8859-1 <range: 160 -- 255>
|
|
|
|
*/
|
|
|
|
ascii = " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
|
|
|
|
range = RANGE_160;
|
|
|
|
}
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
else if (enc == PG_LATIN2)
|
2000-08-05 16:59:29 +02:00
|
|
|
{
|
2001-03-22 07:16:21 +01:00
|
|
|
/*
|
2000-08-05 16:59:29 +02:00
|
|
|
* ISO-8859-2 <range: 160 -- 255>
|
|
|
|
*/
|
|
|
|
ascii = " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
|
|
|
|
range = RANGE_160;
|
|
|
|
}
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
else if (enc == PG_WIN1250)
|
2000-08-05 16:59:29 +02:00
|
|
|
{
|
2001-03-22 07:16:21 +01:00
|
|
|
/*
|
2000-08-05 16:59:29 +02:00
|
|
|
* Window CP1250 <range: 128 -- 255>
|
|
|
|
*/
|
|
|
|
ascii = " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
|
|
|
|
range = RANGE_128;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2003-07-27 06:53:12 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
2004-08-29 07:07:03 +02:00
|
|
|
errmsg("encoding conversion from %s to ASCII not supported",
|
|
|
|
pg_encoding_to_char(enc))));
|
2003-07-14 18:41:38 +02:00
|
|
|
return; /* keep compiler quiet */
|
2000-08-05 16:59:29 +02:00
|
|
|
}
|
2001-03-22 05:01:46 +01:00
|
|
|
|
2001-03-22 07:16:21 +01:00
|
|
|
/*
|
2000-08-05 16:59:29 +02:00
|
|
|
* Encode
|
|
|
|
*/
|
2003-04-02 23:07:59 +02:00
|
|
|
for (x = src; x < src_end; x++)
|
2000-08-05 16:59:29 +02:00
|
|
|
{
|
2001-03-22 05:01:46 +01:00
|
|
|
if (*x < 128)
|
2003-07-14 18:41:38 +02:00
|
|
|
*dest++ = *x;
|
2000-08-05 16:59:29 +02:00
|
|
|
else if (*x < range)
|
2003-07-14 18:41:38 +02:00
|
|
|
*dest++ = ' '; /* bogus 128 to 'range' */
|
2000-08-05 16:59:29 +02:00
|
|
|
else
|
2003-07-14 18:41:38 +02:00
|
|
|
*dest++ = ascii[*x - range];
|
2001-03-22 05:01:46 +01:00
|
|
|
}
|
2000-08-05 16:59:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* encode text
|
2003-07-14 18:41:38 +02:00
|
|
|
*
|
|
|
|
* The text datum is overwritten in-place, therefore this coding method
|
|
|
|
* cannot support conversions that change the string length!
|
2000-08-05 16:59:29 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
static text *
|
|
|
|
encode_to_ascii(text *data, int enc)
|
|
|
|
{
|
2003-07-14 18:41:38 +02:00
|
|
|
pg_to_ascii((unsigned char *) VARDATA(data), /* src */
|
2003-08-04 02:43:34 +02:00
|
|
|
(unsigned char *) (data) + VARSIZE(data), /* src end */
|
2003-07-14 18:41:38 +02:00
|
|
|
(unsigned char *) VARDATA(data), /* dest */
|
2001-03-22 05:01:46 +01:00
|
|
|
enc); /* encoding */
|
|
|
|
|
2000-08-05 16:59:29 +02:00
|
|
|
return data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* convert to ASCII - enc is set as 'name' arg.
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
to_ascii_encname(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2003-08-04 02:43:34 +02:00
|
|
|
text *data = PG_GETARG_TEXT_P_COPY(0);
|
|
|
|
int enc = pg_char_to_encoding(NameStr(*PG_GETARG_NAME(1)));
|
2003-07-14 18:41:38 +02:00
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
|
2000-08-05 16:59:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* convert to ASCII - enc is set as int4
|
|
|
|
* ----------
|
|
|
|
*/
|
2001-03-22 05:01:46 +01:00
|
|
|
Datum
|
2000-08-05 16:59:29 +02:00
|
|
|
to_ascii_enc(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2003-08-04 02:43:34 +02:00
|
|
|
text *data = PG_GETARG_TEXT_P_COPY(0);
|
|
|
|
int enc = PG_GETARG_INT32(1);
|
2003-07-14 18:41:38 +02:00
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
|
2000-08-05 16:59:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ----------
|
|
|
|
* convert to ASCII - current enc is DatabaseEncoding
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
to_ascii_default(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2003-08-04 02:43:34 +02:00
|
|
|
text *data = PG_GETARG_TEXT_P_COPY(0);
|
|
|
|
int enc = GetDatabaseEncoding();
|
2003-07-14 18:41:38 +02:00
|
|
|
|
|
|
|
PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
|
2000-08-05 16:59:29 +02:00
|
|
|
}
|