2014-02-23 21:22:50 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* mbutils.c
|
|
|
|
* This file contains functions for encoding conversion.
|
|
|
|
*
|
|
|
|
* The string-conversion functions in this file share some API quirks.
|
|
|
|
* Note the following:
|
|
|
|
*
|
|
|
|
* The functions return a palloc'd, null-terminated string if conversion
|
|
|
|
* is required. However, if no conversion is performed, the given source
|
|
|
|
* string pointer is returned as-is.
|
|
|
|
*
|
|
|
|
* Although the presence of a length argument means that callers can pass
|
|
|
|
* non-null-terminated strings, care is required because the same string
|
|
|
|
* will be passed back if no conversion occurs. Such callers *must* check
|
|
|
|
* whether result == src and handle that case differently.
|
|
|
|
*
|
|
|
|
* If the source and destination encodings are the same, the source string
|
|
|
|
* is returned without any verification; it's assumed to be valid data.
|
|
|
|
* If that might not be the case, the caller is responsible for validating
|
|
|
|
* the string using a separate call to pg_verify_mbstr(). Whenever the
|
|
|
|
* source and destination encodings are different, the functions ensure that
|
|
|
|
* the result is validly encoded according to the destination encoding.
|
|
|
|
*
|
|
|
|
*
|
2019-01-02 18:44:25 +01:00
|
|
|
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
|
2014-02-23 21:22:50 +01:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
2009-04-02 19:30:53 +02:00
|
|
|
*
|
2002-11-02 19:41:22 +01:00
|
|
|
*
|
2014-02-23 21:22:50 +01:00
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/utils/mb/mbutils.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
2001-02-10 03:31:31 +01:00
|
|
|
*/
|
1999-07-17 18:25:28 +02:00
|
|
|
#include "postgres.h"
|
2002-11-02 19:41:22 +01:00
|
|
|
|
2002-07-25 12:07:13 +02:00
|
|
|
#include "access/xact.h"
|
2006-01-12 23:04:02 +01:00
|
|
|
#include "catalog/namespace.h"
|
1998-07-24 05:32:46 +02:00
|
|
|
#include "mb/pg_wchar.h"
|
2000-06-13 09:35:40 +02:00
|
|
|
#include "utils/builtins.h"
|
2002-08-08 08:35:26 +02:00
|
|
|
#include "utils/memutils.h"
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
#include "utils/syscache.h"
|
1998-07-24 05:32:46 +02:00
|
|
|
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
/*
|
2009-04-02 19:30:53 +02:00
|
|
|
* We maintain a simple linked list caching the fmgr lookup info for the
|
|
|
|
* currently selected conversion functions, as well as any that have been
|
2014-05-06 18:12:18 +02:00
|
|
|
* selected previously in the current session. (We remember previous
|
2009-04-02 19:30:53 +02:00
|
|
|
* settings because we must be able to restore a previous setting during
|
|
|
|
* transaction rollback, without doing any fresh catalog accesses.)
|
|
|
|
*
|
|
|
|
* Since we'll never release this data, we just keep it in TopMemoryContext.
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
*/
|
2009-04-02 19:30:53 +02:00
|
|
|
typedef struct ConvProcInfo
|
|
|
|
{
|
|
|
|
int s_encoding; /* server and client encoding IDs */
|
|
|
|
int c_encoding;
|
2009-06-11 16:49:15 +02:00
|
|
|
FmgrInfo to_server_info; /* lookup info for conversion procs */
|
2009-04-02 19:30:53 +02:00
|
|
|
FmgrInfo to_client_info;
|
|
|
|
} ConvProcInfo;
|
|
|
|
|
|
|
|
static List *ConvProcList = NIL; /* List of ConvProcInfo */
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
|
2001-08-15 09:07:40 +02:00
|
|
|
/*
|
2009-04-02 19:30:53 +02:00
|
|
|
* These variables point to the currently active conversion functions,
|
|
|
|
* or are NULL when no conversion is needed.
|
2001-08-15 09:07:40 +02:00
|
|
|
*/
|
2002-11-02 19:41:22 +01:00
|
|
|
static FmgrInfo *ToServerConvProc = NULL;
|
|
|
|
static FmgrInfo *ToClientConvProc = NULL;
|
2002-08-08 08:35:26 +02:00
|
|
|
|
2009-04-02 19:30:53 +02:00
|
|
|
/*
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
* These variables track the currently-selected encodings.
|
2009-04-02 19:30:53 +02:00
|
|
|
*/
|
2014-01-18 22:04:11 +01:00
|
|
|
static const pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
|
|
|
|
static const pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
|
|
|
|
static const pg_enc2name *MessageEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
|
2009-04-02 19:30:53 +02:00
|
|
|
|
2003-04-27 19:31:25 +02:00
|
|
|
/*
|
|
|
|
* During backend startup we can't set client encoding because we (a)
|
|
|
|
* can't look up the conversion functions, and (b) may not know the database
|
|
|
|
* encoding yet either. So SetClientEncoding() just accepts anything and
|
|
|
|
* remembers it for InitializeClientEncoding() to apply later.
|
|
|
|
*/
|
|
|
|
static bool backend_startup_complete = false;
|
|
|
|
static int pending_client_encoding = PG_SQL_ASCII;
|
|
|
|
|
|
|
|
|
2002-08-08 08:35:26 +02:00
|
|
|
/* Internal functions */
|
2005-09-24 19:53:28 +02:00
|
|
|
static char *perform_default_encoding_conversion(const char *src,
|
2002-11-02 19:41:22 +01:00
|
|
|
int len, bool is_client_to_server);
|
2005-09-24 19:53:28 +02:00
|
|
|
static int cliplen(const char *str, int len, int limit);
|
2002-08-08 08:35:26 +02:00
|
|
|
|
2002-08-29 09:22:30 +02:00
|
|
|
|
2002-08-08 08:35:26 +02:00
|
|
|
/*
|
2014-05-06 18:12:18 +02:00
|
|
|
* Prepare for a future call to SetClientEncoding. Success should mean
|
2011-04-07 06:11:01 +02:00
|
|
|
* that SetClientEncoding is guaranteed to succeed for this encoding request.
|
|
|
|
*
|
|
|
|
* (But note that success before backend_startup_complete does not guarantee
|
|
|
|
* success after ...)
|
|
|
|
*
|
|
|
|
* Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
|
2003-04-27 19:31:25 +02:00
|
|
|
*/
|
2001-08-15 09:07:40 +02:00
|
|
|
int
|
2011-04-07 06:11:01 +02:00
|
|
|
PrepareClientEncoding(int encoding)
|
2001-08-15 09:07:40 +02:00
|
|
|
{
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
int current_server_encoding;
|
2009-04-02 19:30:53 +02:00
|
|
|
ListCell *lc;
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
if (!PG_VALID_FE_ENCODING(encoding))
|
2006-01-11 09:43:13 +01:00
|
|
|
return -1;
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2003-04-27 19:31:25 +02:00
|
|
|
/* Can't do anything during startup, per notes above */
|
|
|
|
if (!backend_startup_complete)
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
return 0;
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
|
2003-04-27 19:31:25 +02:00
|
|
|
current_server_encoding = GetDatabaseEncoding();
|
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
/*
|
2003-04-27 19:31:25 +02:00
|
|
|
* Check for cases that require no conversion function.
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
*/
|
2003-04-27 19:31:25 +02:00
|
|
|
if (current_server_encoding == encoding ||
|
2006-01-11 07:59:22 +01:00
|
|
|
current_server_encoding == PG_SQL_ASCII ||
|
|
|
|
encoding == PG_SQL_ASCII)
|
2003-04-27 19:31:25 +02:00
|
|
|
return 0;
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
|
2009-04-02 19:30:53 +02:00
|
|
|
if (IsTransactionState())
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If we're in a live transaction, it's safe to access the catalogs,
|
2009-06-11 16:49:15 +02:00
|
|
|
* so look up the functions. We repeat the lookup even if the info is
|
|
|
|
* already cached, so that we can react to changes in the contents of
|
|
|
|
* pg_conversion.
|
2009-04-02 19:30:53 +02:00
|
|
|
*/
|
|
|
|
Oid to_server_proc,
|
|
|
|
to_client_proc;
|
|
|
|
ConvProcInfo *convinfo;
|
|
|
|
MemoryContext oldcontext;
|
|
|
|
|
|
|
|
to_server_proc = FindDefaultConversionProc(encoding,
|
|
|
|
current_server_encoding);
|
|
|
|
if (!OidIsValid(to_server_proc))
|
|
|
|
return -1;
|
|
|
|
to_client_proc = FindDefaultConversionProc(current_server_encoding,
|
|
|
|
encoding);
|
|
|
|
if (!OidIsValid(to_client_proc))
|
|
|
|
return -1;
|
2003-04-27 20:01:46 +02:00
|
|
|
|
2009-04-02 19:30:53 +02:00
|
|
|
/*
|
|
|
|
* Load the fmgr info into TopMemoryContext (could still fail here)
|
|
|
|
*/
|
|
|
|
convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
|
|
|
|
sizeof(ConvProcInfo));
|
|
|
|
convinfo->s_encoding = current_server_encoding;
|
|
|
|
convinfo->c_encoding = encoding;
|
|
|
|
fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
|
|
|
|
TopMemoryContext);
|
|
|
|
fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
|
|
|
|
TopMemoryContext);
|
|
|
|
|
|
|
|
/* Attach new info to head of list */
|
|
|
|
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
|
|
|
|
ConvProcList = lcons(convinfo, ConvProcList);
|
|
|
|
MemoryContextSwitchTo(oldcontext);
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
|
2009-04-02 19:30:53 +02:00
|
|
|
/*
|
2011-04-07 06:11:01 +02:00
|
|
|
* We cannot yet remove any older entry for the same encoding pair,
|
2014-05-06 18:12:18 +02:00
|
|
|
* since it could still be in use. SetClientEncoding will clean up.
|
2009-04-02 19:30:53 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
return 0; /* success */
|
2006-01-12 23:04:02 +01:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/*
|
2009-06-11 16:49:15 +02:00
|
|
|
* If we're not in a live transaction, the only thing we can do is
|
2014-05-06 18:12:18 +02:00
|
|
|
* restore a previous setting using the cache. This covers all
|
|
|
|
* transaction-rollback cases. The only case it might not work for is
|
2009-06-11 16:49:15 +02:00
|
|
|
* trying to change client_encoding on the fly by editing
|
2009-04-02 19:30:53 +02:00
|
|
|
* postgresql.conf and SIGHUP'ing. Which would probably be a stupid
|
|
|
|
* thing to do anyway.
|
2006-01-12 23:04:02 +01:00
|
|
|
*/
|
2009-04-02 19:30:53 +02:00
|
|
|
foreach(lc, ConvProcList)
|
|
|
|
{
|
|
|
|
ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
|
2002-08-14 07:33:34 +02:00
|
|
|
|
2009-04-02 19:30:53 +02:00
|
|
|
if (oldinfo->s_encoding == current_server_encoding &&
|
|
|
|
oldinfo->c_encoding == encoding)
|
|
|
|
return 0;
|
|
|
|
}
|
2003-04-27 19:31:25 +02:00
|
|
|
|
2009-04-02 19:30:53 +02:00
|
|
|
return -1; /* it's not cached, so fail */
|
|
|
|
}
|
2001-08-15 09:07:40 +02:00
|
|
|
}
|
|
|
|
|
2003-04-27 19:31:25 +02:00
|
|
|
/*
|
2011-04-07 06:11:01 +02:00
|
|
|
* Set the active client encoding and set up the conversion-function pointers.
|
|
|
|
* PrepareClientEncoding should have been called previously for this encoding.
|
|
|
|
*
|
|
|
|
* Returns 0 if okay, -1 if not (bad encoding or can't support conversion)
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
SetClientEncoding(int encoding)
|
|
|
|
{
|
|
|
|
int current_server_encoding;
|
|
|
|
bool found;
|
|
|
|
ListCell *lc;
|
2011-04-17 19:36:38 +02:00
|
|
|
ListCell *prev;
|
|
|
|
ListCell *next;
|
2011-04-07 06:11:01 +02:00
|
|
|
|
|
|
|
if (!PG_VALID_FE_ENCODING(encoding))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Can't do anything during startup, per notes above */
|
|
|
|
if (!backend_startup_complete)
|
|
|
|
{
|
|
|
|
pending_client_encoding = encoding;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
current_server_encoding = GetDatabaseEncoding();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for cases that require no conversion function.
|
|
|
|
*/
|
|
|
|
if (current_server_encoding == encoding ||
|
|
|
|
current_server_encoding == PG_SQL_ASCII ||
|
|
|
|
encoding == PG_SQL_ASCII)
|
|
|
|
{
|
|
|
|
ClientEncoding = &pg_enc2name_tbl[encoding];
|
|
|
|
ToServerConvProc = NULL;
|
|
|
|
ToClientConvProc = NULL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Search the cache for the entry previously prepared by
|
|
|
|
* PrepareClientEncoding; if there isn't one, we lose. While at it,
|
2011-04-10 17:42:00 +02:00
|
|
|
* release any duplicate entries so that repeated Prepare/Set cycles don't
|
|
|
|
* leak memory.
|
2011-04-07 06:11:01 +02:00
|
|
|
*/
|
|
|
|
found = false;
|
2011-04-17 19:36:38 +02:00
|
|
|
prev = NULL;
|
|
|
|
for (lc = list_head(ConvProcList); lc; lc = next)
|
2011-04-07 06:11:01 +02:00
|
|
|
{
|
|
|
|
ConvProcInfo *convinfo = (ConvProcInfo *) lfirst(lc);
|
|
|
|
|
2011-04-17 19:36:38 +02:00
|
|
|
next = lnext(lc);
|
|
|
|
|
2011-04-07 06:11:01 +02:00
|
|
|
if (convinfo->s_encoding == current_server_encoding &&
|
|
|
|
convinfo->c_encoding == encoding)
|
|
|
|
{
|
|
|
|
if (!found)
|
|
|
|
{
|
|
|
|
/* Found newest entry, so set up */
|
|
|
|
ClientEncoding = &pg_enc2name_tbl[encoding];
|
|
|
|
ToServerConvProc = &convinfo->to_server_info;
|
|
|
|
ToClientConvProc = &convinfo->to_client_info;
|
|
|
|
found = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Duplicate entry, release it */
|
2011-04-17 19:36:38 +02:00
|
|
|
ConvProcList = list_delete_cell(ConvProcList, lc, prev);
|
2011-04-07 06:11:01 +02:00
|
|
|
pfree(convinfo);
|
2011-04-17 19:36:38 +02:00
|
|
|
continue; /* prev mustn't advance */
|
2011-04-07 06:11:01 +02:00
|
|
|
}
|
|
|
|
}
|
2011-04-17 19:36:38 +02:00
|
|
|
|
|
|
|
prev = lc;
|
2011-04-07 06:11:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (found)
|
|
|
|
return 0; /* success */
|
|
|
|
else
|
|
|
|
return -1; /* it's not cached, so fail */
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize client encoding conversions.
|
|
|
|
* Called from InitPostgres() once during backend startup.
|
2003-02-19 15:31:26 +01:00
|
|
|
*/
|
|
|
|
void
|
2003-04-27 19:31:25 +02:00
|
|
|
InitializeClientEncoding(void)
|
2003-02-19 15:31:26 +01:00
|
|
|
{
|
2003-04-27 19:31:25 +02:00
|
|
|
Assert(!backend_startup_complete);
|
|
|
|
backend_startup_complete = true;
|
|
|
|
|
2011-04-07 06:11:01 +02:00
|
|
|
if (PrepareClientEncoding(pending_client_encoding) < 0 ||
|
|
|
|
SetClientEncoding(pending_client_encoding) < 0)
|
2003-02-19 15:31:26 +01:00
|
|
|
{
|
2003-04-27 19:31:25 +02:00
|
|
|
/*
|
2005-10-15 04:49:52 +02:00
|
|
|
* Oops, the requested conversion is not available. We couldn't fail
|
|
|
|
* before, but we can now.
|
2003-04-27 19:31:25 +02:00
|
|
|
*/
|
2003-07-25 22:18:01 +02:00
|
|
|
ereport(FATAL,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("conversion between %s and %s is not supported",
|
|
|
|
pg_enc2name_tbl[pending_client_encoding].name,
|
|
|
|
GetDatabaseEncodingName())));
|
2003-02-19 15:31:26 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1998-07-24 05:32:46 +02:00
|
|
|
/*
|
2009-04-02 19:30:53 +02:00
|
|
|
* returns the current client encoding
|
|
|
|
*/
|
1998-09-01 06:40:42 +02:00
|
|
|
int
|
2001-09-21 17:27:38 +02:00
|
|
|
pg_get_client_encoding(void)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
2006-01-11 09:43:13 +01:00
|
|
|
return ClientEncoding->encoding;
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* returns the current client encoding name
|
|
|
|
*/
|
|
|
|
const char *
|
2001-09-21 17:27:38 +02:00
|
|
|
pg_get_client_encoding_name(void)
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
{
|
2006-01-11 09:43:13 +01:00
|
|
|
return ClientEncoding->name;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert src string to another encoding (general case).
|
1999-09-12 00:28:11 +02:00
|
|
|
*
|
2014-02-23 21:22:50 +01:00
|
|
|
* See the notes about string conversion functions at the top of this file.
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
*/
|
1998-09-01 06:40:42 +02:00
|
|
|
unsigned char *
|
2001-09-21 17:27:38 +02:00
|
|
|
pg_do_encoding_conversion(unsigned char *src, int len,
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
int src_encoding, int dest_encoding)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
unsigned char *result;
|
2002-09-04 22:31:48 +02:00
|
|
|
Oid proc;
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
if (len <= 0)
|
|
|
|
return src; /* empty string is always valid */
|
2002-09-04 22:31:48 +02:00
|
|
|
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
if (src_encoding == dest_encoding)
|
2014-02-23 21:22:50 +01:00
|
|
|
return src; /* no conversion required, assume valid */
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
if (dest_encoding == PG_SQL_ASCII)
|
|
|
|
return src; /* any string is valid in SQL_ASCII */
|
2002-07-25 12:07:13 +02:00
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
if (src_encoding == PG_SQL_ASCII)
|
|
|
|
{
|
|
|
|
/* No conversion is possible, but we must validate the result */
|
|
|
|
(void) pg_verify_mbstr(dest_encoding, (const char *) src, len, false);
|
2002-11-26 03:22:29 +01:00
|
|
|
return src;
|
2014-02-23 21:22:50 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!IsTransactionState()) /* shouldn't happen */
|
|
|
|
elog(ERROR, "cannot perform encoding conversion outside a transaction");
|
2002-11-26 03:22:29 +01:00
|
|
|
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
proc = FindDefaultConversionProc(src_encoding, dest_encoding);
|
|
|
|
if (!OidIsValid(proc))
|
2014-02-23 21:22:50 +01:00
|
|
|
ereport(ERROR,
|
2003-07-25 22:18:01 +02:00
|
|
|
(errcode(ERRCODE_UNDEFINED_FUNCTION),
|
2004-08-29 07:07:03 +02:00
|
|
|
errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
|
|
|
|
pg_encoding_to_char(src_encoding),
|
|
|
|
pg_encoding_to_char(dest_encoding))));
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
|
2007-05-28 18:43:24 +02:00
|
|
|
/*
|
|
|
|
* Allocate space for conversion result, being wary of integer overflow
|
|
|
|
*/
|
|
|
|
if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
|
|
errmsg("out of memory"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errdetail("String of %d bytes is too long for encoding conversion.",
|
|
|
|
len)));
|
2007-05-28 18:43:24 +02:00
|
|
|
|
|
|
|
result = palloc(len * MAX_CONVERSION_GROWTH + 1);
|
I have committed many support files for CREATE CONVERSION. Default
conversion procs and conversions are added in initdb. Currently
supported conversions are:
UTF-8(UNICODE) <--> SQL_ASCII, ISO-8859-1 to 16, EUC_JP, EUC_KR,
EUC_CN, EUC_TW, SJIS, BIG5, GBK, GB18030, UHC,
JOHAB, TCVN
EUC_JP <--> SJIS
EUC_TW <--> BIG5
MULE_INTERNAL <--> EUC_JP, SJIS, EUC_TW, BIG5
Note that initial contents of pg_conversion system catalog are created
in the initdb process. So doing initdb required is ideal, it's
possible to add them to your databases by hand, however. To accomplish
this:
psql -f your_postgresql_install_path/share/conversion_create.sql your_database
So I did not bump up the version in cataversion.h.
TODO:
Add more conversion procs
Add [CASCADE|RESTRICT] to DROP CONVERSION
Add tuples to pg_depend
Add regression tests
Write docs
Add SQL99 CONVERT command?
--
Tatsuo Ishii
2002-07-18 04:02:30 +02:00
|
|
|
|
|
|
|
OidFunctionCall5(proc,
|
|
|
|
Int32GetDatum(src_encoding),
|
|
|
|
Int32GetDatum(dest_encoding),
|
|
|
|
CStringGetDatum(src),
|
|
|
|
CStringGetDatum(result),
|
|
|
|
Int32GetDatum(len));
|
1999-09-12 00:28:11 +02:00
|
|
|
return result;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2001-08-15 09:07:40 +02:00
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert string to encoding encoding_name. The source
|
2007-09-18 19:41:17 +02:00
|
|
|
* encoding is the DB encoding.
|
2001-08-15 09:07:40 +02:00
|
|
|
*
|
2007-09-18 19:41:17 +02:00
|
|
|
* BYTEA convert_to(TEXT string, NAME encoding_name) */
|
2001-08-15 09:07:40 +02:00
|
|
|
Datum
|
2007-09-18 19:41:17 +02:00
|
|
|
pg_convert_to(PG_FUNCTION_ARGS)
|
2001-08-15 09:07:40 +02:00
|
|
|
{
|
2002-09-04 22:31:48 +02:00
|
|
|
Datum string = PG_GETARG_DATUM(0);
|
|
|
|
Datum dest_encoding_name = PG_GETARG_DATUM(1);
|
2008-01-10 00:43:54 +01:00
|
|
|
Datum src_encoding_name = DirectFunctionCall1(namein,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
CStringGetDatum(DatabaseEncoding->name));
|
2002-09-04 22:31:48 +02:00
|
|
|
Datum result;
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
/*
|
|
|
|
* pg_convert expects a bytea as its first argument. We're passing it a
|
|
|
|
* text argument here, relying on the fact that they are both in fact
|
2007-09-24 18:38:24 +02:00
|
|
|
* varlena types, and thus structurally identical.
|
|
|
|
*/
|
2008-01-10 00:43:54 +01:00
|
|
|
result = DirectFunctionCall3(pg_convert, string,
|
|
|
|
src_encoding_name, dest_encoding_name);
|
2007-09-18 19:41:17 +02:00
|
|
|
|
2008-04-13 01:21:04 +02:00
|
|
|
PG_RETURN_DATUM(result);
|
2007-09-18 19:41:17 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert string from encoding encoding_name. The destination
|
2007-09-18 19:41:17 +02:00
|
|
|
* encoding is the DB encoding.
|
|
|
|
*
|
|
|
|
* TEXT convert_from(BYTEA string, NAME encoding_name) */
|
|
|
|
Datum
|
|
|
|
pg_convert_from(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
Datum string = PG_GETARG_DATUM(0);
|
|
|
|
Datum src_encoding_name = PG_GETARG_DATUM(1);
|
2008-01-10 00:43:54 +01:00
|
|
|
Datum dest_encoding_name = DirectFunctionCall1(namein,
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
CStringGetDatum(DatabaseEncoding->name));
|
2007-09-18 19:41:17 +02:00
|
|
|
Datum result;
|
|
|
|
|
2008-01-10 00:43:54 +01:00
|
|
|
result = DirectFunctionCall3(pg_convert, string,
|
|
|
|
src_encoding_name, dest_encoding_name);
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2007-11-15 22:14:46 +01:00
|
|
|
/*
|
|
|
|
* pg_convert returns a bytea, which we in turn return as text, relying on
|
|
|
|
* the fact that they are both in fact varlena types, and thus
|
2007-09-24 18:38:24 +02:00
|
|
|
* structurally identical. Although not all bytea values are valid text,
|
|
|
|
* in this case it will be because we've told pg_convert to return one
|
|
|
|
* that is valid as text in the current database encoding.
|
|
|
|
*/
|
2008-04-13 01:21:04 +02:00
|
|
|
PG_RETURN_DATUM(result);
|
2001-08-15 09:07:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert string between two arbitrary encodings.
|
2001-08-15 09:07:40 +02:00
|
|
|
*
|
2007-09-18 19:41:17 +02:00
|
|
|
* BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
|
2001-08-15 09:07:40 +02:00
|
|
|
*/
|
|
|
|
Datum
|
2007-09-18 19:41:17 +02:00
|
|
|
pg_convert(PG_FUNCTION_ARGS)
|
2001-08-15 09:07:40 +02:00
|
|
|
{
|
2010-12-03 04:00:27 +01:00
|
|
|
bytea *string = PG_GETARG_BYTEA_PP(0);
|
2001-10-25 07:50:21 +02:00
|
|
|
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
|
|
|
|
int src_encoding = pg_char_to_encoding(src_encoding_name);
|
|
|
|
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
|
|
|
|
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
|
2010-12-03 04:00:27 +01:00
|
|
|
const char *src_str;
|
|
|
|
char *dest_str;
|
2007-09-18 19:41:17 +02:00
|
|
|
bytea *retval;
|
2002-09-04 22:31:48 +02:00
|
|
|
int len;
|
2001-08-15 09:07:40 +02:00
|
|
|
|
|
|
|
if (src_encoding < 0)
|
2003-07-25 22:18:01 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid source encoding name \"%s\"",
|
|
|
|
src_encoding_name)));
|
2001-08-15 09:07:40 +02:00
|
|
|
if (dest_encoding < 0)
|
2003-07-25 22:18:01 +02:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid destination encoding name \"%s\"",
|
|
|
|
dest_encoding_name)));
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2010-12-03 04:00:27 +01:00
|
|
|
/* make sure that source string is valid */
|
|
|
|
len = VARSIZE_ANY_EXHDR(string);
|
|
|
|
src_str = VARDATA_ANY(string);
|
|
|
|
pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
2001-11-19 07:48:39 +01:00
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
/* perform conversion */
|
|
|
|
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) src_str,
|
|
|
|
len,
|
|
|
|
src_encoding,
|
|
|
|
dest_encoding);
|
|
|
|
|
|
|
|
/* update len if conversion actually happened */
|
2010-12-03 04:00:27 +01:00
|
|
|
if (dest_str != src_str)
|
|
|
|
len = strlen(dest_str);
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2002-09-04 22:31:48 +02:00
|
|
|
/*
|
2007-09-18 19:41:17 +02:00
|
|
|
* build bytea data type structure.
|
2002-09-04 22:31:48 +02:00
|
|
|
*/
|
2010-12-03 04:00:27 +01:00
|
|
|
retval = (bytea *) palloc(len + VARHDRSZ);
|
|
|
|
SET_VARSIZE(retval, len + VARHDRSZ);
|
|
|
|
memcpy(VARDATA(retval), dest_str, len);
|
|
|
|
|
|
|
|
if (dest_str != src_str)
|
|
|
|
pfree(dest_str);
|
2001-08-15 09:07:40 +02:00
|
|
|
|
|
|
|
/* free memory if allocated by the toaster */
|
|
|
|
PG_FREE_IF_COPY(string, 0);
|
|
|
|
|
2007-09-18 19:41:17 +02:00
|
|
|
PG_RETURN_BYTEA_P(retval);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* get the length of the string considered as text in the specified
|
|
|
|
* encoding. Raises an error if the data is not valid in that
|
|
|
|
* encoding.
|
|
|
|
*
|
|
|
|
* INT4 length (BYTEA string, NAME src_encoding_name)
|
|
|
|
*/
|
|
|
|
Datum
|
|
|
|
length_in_encoding(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2014-02-23 21:22:50 +01:00
|
|
|
bytea *string = PG_GETARG_BYTEA_PP(0);
|
2007-09-18 19:41:17 +02:00
|
|
|
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
|
|
|
|
int src_encoding = pg_char_to_encoding(src_encoding_name);
|
2014-02-23 21:22:50 +01:00
|
|
|
const char *src_str;
|
|
|
|
int len;
|
2007-11-15 22:14:46 +01:00
|
|
|
int retval;
|
2007-09-18 19:41:17 +02:00
|
|
|
|
2007-10-13 22:18:42 +02:00
|
|
|
if (src_encoding < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
|
|
errmsg("invalid encoding name \"%s\"",
|
|
|
|
src_encoding_name)));
|
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
len = VARSIZE_ANY_EXHDR(string);
|
|
|
|
src_str = VARDATA_ANY(string);
|
|
|
|
|
|
|
|
retval = pg_verify_mbstr_len(src_encoding, src_str, len, false);
|
2007-11-15 22:14:46 +01:00
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
PG_RETURN_INT32(retval);
|
2001-08-15 09:07:40 +02:00
|
|
|
}
|
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
/*
|
|
|
|
* Get maximum multibyte character length in the specified encoding.
|
|
|
|
*
|
|
|
|
* Note encoding is specified numerically, not by name as above.
|
|
|
|
*/
|
2009-07-07 20:23:15 +02:00
|
|
|
Datum
|
|
|
|
pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
|
|
|
|
{
|
2010-02-26 03:01:40 +01:00
|
|
|
int encoding = PG_GETARG_INT32(0);
|
2009-07-07 20:23:15 +02:00
|
|
|
|
|
|
|
if (PG_VALID_ENCODING(encoding))
|
2009-07-07 21:28:56 +02:00
|
|
|
PG_RETURN_INT32(pg_wchar_table[encoding].maxmblen);
|
2009-07-07 20:23:15 +02:00
|
|
|
else
|
|
|
|
PG_RETURN_NULL();
|
|
|
|
}
|
|
|
|
|
2001-08-15 09:07:40 +02:00
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert client encoding to server encoding.
|
|
|
|
*
|
|
|
|
* See the notes about string conversion functions at the top of this file.
|
2001-08-15 09:07:40 +02:00
|
|
|
*/
|
2005-09-24 19:53:28 +02:00
|
|
|
char *
|
|
|
|
pg_client_to_server(const char *s, int len)
|
2011-02-21 06:08:04 +01:00
|
|
|
{
|
|
|
|
return pg_any_to_server(s, len, ClientEncoding->encoding);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert any encoding to server encoding.
|
|
|
|
*
|
|
|
|
* See the notes about string conversion functions at the top of this file.
|
|
|
|
*
|
|
|
|
* Unlike the other string conversion functions, this will apply validation
|
2014-05-06 18:12:18 +02:00
|
|
|
* even if encoding == DatabaseEncoding->encoding. This is because this is
|
2014-02-23 21:22:50 +01:00
|
|
|
* used to process data coming in from outside the database, and we never
|
|
|
|
* want to just assume validity.
|
2011-02-21 06:08:04 +01:00
|
|
|
*/
|
|
|
|
char *
|
|
|
|
pg_any_to_server(const char *s, int len, int encoding)
|
2001-08-15 09:07:40 +02:00
|
|
|
{
|
2006-05-21 22:05:21 +02:00
|
|
|
if (len <= 0)
|
2014-02-23 21:22:50 +01:00
|
|
|
return (char *) s; /* empty string is always valid */
|
2006-05-21 22:05:21 +02:00
|
|
|
|
2011-02-21 06:08:04 +01:00
|
|
|
if (encoding == DatabaseEncoding->encoding ||
|
|
|
|
encoding == PG_SQL_ASCII)
|
2006-05-21 22:05:21 +02:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* No conversion is needed, but we must still validate the data.
|
|
|
|
*/
|
|
|
|
(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
|
|
|
|
return (char *) s;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (DatabaseEncoding->encoding == PG_SQL_ASCII)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* No conversion is possible, but we must still validate the data,
|
2006-10-04 02:30:14 +02:00
|
|
|
* because the client-side code might have done string escaping using
|
|
|
|
* the selected client_encoding. If the client encoding is ASCII-safe
|
|
|
|
* then we just do a straight validation under that encoding. For an
|
|
|
|
* ASCII-unsafe encoding we have a problem: we dare not pass such data
|
2014-05-06 18:12:18 +02:00
|
|
|
* to the parser but we have no way to convert it. We compromise by
|
2006-10-04 02:30:14 +02:00
|
|
|
* rejecting the data if it contains any non-ASCII characters.
|
2006-05-21 22:05:21 +02:00
|
|
|
*/
|
2011-02-21 06:08:04 +01:00
|
|
|
if (PG_VALID_BE_ENCODING(encoding))
|
|
|
|
(void) pg_verify_mbstr(encoding, s, len, false);
|
2006-05-21 22:05:21 +02:00
|
|
|
else
|
|
|
|
{
|
2006-10-04 02:30:14 +02:00
|
|
|
int i;
|
2006-05-21 22:05:21 +02:00
|
|
|
|
|
|
|
for (i = 0; i < len; i++)
|
|
|
|
{
|
|
|
|
if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errmsg("invalid byte value for encoding \"%s\": 0x%02x",
|
|
|
|
pg_enc2name_tbl[PG_SQL_ASCII].name,
|
|
|
|
(unsigned char) s[i])));
|
2006-05-21 22:05:21 +02:00
|
|
|
}
|
|
|
|
}
|
2005-09-24 19:53:28 +02:00
|
|
|
return (char *) s;
|
2006-05-21 22:05:21 +02:00
|
|
|
}
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
/* Fast path if we can use cached conversion function */
|
|
|
|
if (encoding == ClientEncoding->encoding)
|
2011-02-21 06:08:04 +01:00
|
|
|
return perform_default_encoding_conversion(s, len, true);
|
2014-02-23 21:22:50 +01:00
|
|
|
|
|
|
|
/* General case ... will not work outside transactions */
|
|
|
|
return (char *) pg_do_encoding_conversion((unsigned char *) s,
|
|
|
|
len,
|
|
|
|
encoding,
|
|
|
|
DatabaseEncoding->encoding);
|
2001-08-15 09:07:40 +02:00
|
|
|
}
|
|
|
|
|
1998-07-24 05:32:46 +02:00
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert server encoding to client encoding.
|
|
|
|
*
|
|
|
|
* See the notes about string conversion functions at the top of this file.
|
1998-07-24 05:32:46 +02:00
|
|
|
*/
|
2005-09-24 19:53:28 +02:00
|
|
|
char *
|
|
|
|
pg_server_to_client(const char *s, int len)
|
2011-02-21 06:08:04 +01:00
|
|
|
{
|
2011-02-21 08:26:58 +01:00
|
|
|
return pg_server_to_any(s, len, ClientEncoding->encoding);
|
2011-02-21 06:08:04 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2014-02-23 21:22:50 +01:00
|
|
|
* Convert server encoding to any encoding.
|
|
|
|
*
|
|
|
|
* See the notes about string conversion functions at the top of this file.
|
2011-02-21 06:08:04 +01:00
|
|
|
*/
|
|
|
|
char *
|
|
|
|
pg_server_to_any(const char *s, int len, int encoding)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
2006-05-21 22:05:21 +02:00
|
|
|
if (len <= 0)
|
2014-02-23 21:22:50 +01:00
|
|
|
return (char *) s; /* empty string is always valid */
|
2001-08-15 09:07:40 +02:00
|
|
|
|
2011-02-21 06:08:04 +01:00
|
|
|
if (encoding == DatabaseEncoding->encoding ||
|
2014-02-23 21:22:50 +01:00
|
|
|
encoding == PG_SQL_ASCII)
|
2006-05-21 22:05:21 +02:00
|
|
|
return (char *) s; /* assume data is valid */
|
|
|
|
|
2014-02-23 21:22:50 +01:00
|
|
|
if (DatabaseEncoding->encoding == PG_SQL_ASCII)
|
|
|
|
{
|
|
|
|
/* No conversion is possible, but we must validate the result */
|
|
|
|
(void) pg_verify_mbstr(encoding, s, len, false);
|
|
|
|
return (char *) s;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fast path if we can use cached conversion function */
|
|
|
|
if (encoding == ClientEncoding->encoding)
|
2011-02-21 06:08:04 +01:00
|
|
|
return perform_default_encoding_conversion(s, len, false);
|
2014-02-23 21:22:50 +01:00
|
|
|
|
|
|
|
/* General case ... will not work outside transactions */
|
|
|
|
return (char *) pg_do_encoding_conversion((unsigned char *) s,
|
|
|
|
len,
|
|
|
|
DatabaseEncoding->encoding,
|
|
|
|
encoding);
|
2002-08-08 08:35:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2002-09-04 22:31:48 +02:00
|
|
|
* Perform default encoding conversion using cached FmgrInfo. Since
|
|
|
|
* this function does not access database at all, it is safe to call
|
2009-04-02 19:30:53 +02:00
|
|
|
* outside transactions. If the conversion has not been set up by
|
|
|
|
* SetClientEncoding(), no conversion is performed.
|
|
|
|
*/
|
2005-09-24 19:53:28 +02:00
|
|
|
static char *
|
2014-02-23 21:22:50 +01:00
|
|
|
perform_default_encoding_conversion(const char *src, int len,
|
|
|
|
bool is_client_to_server)
|
2002-08-08 08:35:26 +02:00
|
|
|
{
|
2005-09-24 19:53:28 +02:00
|
|
|
char *result;
|
2002-09-04 22:31:48 +02:00
|
|
|
int src_encoding,
|
|
|
|
dest_encoding;
|
|
|
|
FmgrInfo *flinfo;
|
2002-11-26 03:22:29 +01:00
|
|
|
|
2002-08-08 08:35:26 +02:00
|
|
|
if (is_client_to_server)
|
|
|
|
{
|
|
|
|
src_encoding = ClientEncoding->encoding;
|
|
|
|
dest_encoding = DatabaseEncoding->encoding;
|
2002-11-02 19:41:22 +01:00
|
|
|
flinfo = ToServerConvProc;
|
2002-08-08 08:35:26 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
src_encoding = DatabaseEncoding->encoding;
|
|
|
|
dest_encoding = ClientEncoding->encoding;
|
2002-11-02 19:41:22 +01:00
|
|
|
flinfo = ToClientConvProc;
|
2002-08-08 08:35:26 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (flinfo == NULL)
|
2005-09-24 19:53:28 +02:00
|
|
|
return (char *) src;
|
2002-08-08 08:35:26 +02:00
|
|
|
|
2007-05-28 18:43:24 +02:00
|
|
|
/*
|
|
|
|
* Allocate space for conversion result, being wary of integer overflow
|
|
|
|
*/
|
|
|
|
if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
|
|
errmsg("out of memory"),
|
Phase 3 of pgindent updates.
Don't move parenthesized lines to the left, even if that means they
flow past the right margin.
By default, BSD indent lines up statement continuation lines that are
within parentheses so that they start just to the right of the preceding
left parenthesis. However, traditionally, if that resulted in the
continuation line extending to the right of the desired right margin,
then indent would push it left just far enough to not overrun the margin,
if it could do so without making the continuation line start to the left of
the current statement indent. That makes for a weird mix of indentations
unless one has been completely rigid about never violating the 80-column
limit.
This behavior has been pretty universally panned by Postgres developers.
Hence, disable it with indent's new -lpl switch, so that parenthesized
lines are always lined up with the preceding left paren.
This patch is much less interesting than the first round of indent
changes, but also bulkier, so I thought it best to separate the effects.
Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org
Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:35:54 +02:00
|
|
|
errdetail("String of %d bytes is too long for encoding conversion.",
|
|
|
|
len)));
|
2007-05-28 18:43:24 +02:00
|
|
|
|
|
|
|
result = palloc(len * MAX_CONVERSION_GROWTH + 1);
|
2002-08-08 08:35:26 +02:00
|
|
|
|
|
|
|
FunctionCall5(flinfo,
|
|
|
|
Int32GetDatum(src_encoding),
|
|
|
|
Int32GetDatum(dest_encoding),
|
|
|
|
CStringGetDatum(src),
|
|
|
|
CStringGetDatum(result),
|
|
|
|
Int32GetDatum(len));
|
|
|
|
return result;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2008-06-18 20:42:54 +02:00
|
|
|
|
2002-09-03 23:45:44 +02:00
|
|
|
/* convert a multibyte string to a wchar */
|
2000-08-27 12:40:48 +02:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mb2wchar(const char *from, pg_wchar *to)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, strlen(from));
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2002-09-03 23:45:44 +02:00
|
|
|
/* convert a multibyte string to a wchar with a limited length */
|
2000-08-27 12:40:48 +02:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2006-12-24 01:57:48 +01:00
|
|
|
/* same, with any encoding */
|
|
|
|
int
|
|
|
|
pg_encoding_mb2wchar_with_len(int encoding,
|
|
|
|
const char *from, pg_wchar *to, int len)
|
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[encoding].mb2wchar_with_len((const unsigned char *) from, to, len);
|
2006-12-24 01:57:48 +01:00
|
|
|
}
|
|
|
|
|
2012-07-04 23:10:10 +02:00
|
|
|
/* convert a wchar string to a multibyte */
|
|
|
|
int
|
|
|
|
pg_wchar2mb(const pg_wchar *from, char *to)
|
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, pg_wchar_strlen(from));
|
2012-07-04 23:10:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* convert a wchar string to a multibyte with a limited length */
|
|
|
|
int
|
|
|
|
pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
|
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
|
2012-07-04 23:10:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* same, with any encoding */
|
|
|
|
int
|
|
|
|
pg_encoding_wchar2mb_with_len(int encoding,
|
|
|
|
const pg_wchar *from, char *to, int len)
|
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len);
|
2012-07-04 23:10:10 +02:00
|
|
|
}
|
|
|
|
|
2009-01-04 19:37:36 +01:00
|
|
|
/* returns the byte length of a multibyte character */
|
1998-09-01 06:40:42 +02:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mblen(const char *mbstr)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr);
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2009-01-04 19:37:36 +01:00
|
|
|
/* returns the display length of a multibyte character */
|
2004-03-15 11:41:26 +01:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_dsplen(const char *mbstr)
|
2004-03-15 11:41:26 +01:00
|
|
|
{
|
2017-09-07 18:06:23 +02:00
|
|
|
return pg_wchar_table[DatabaseEncoding->encoding].dsplen((const unsigned char *) mbstr);
|
2004-03-15 11:41:26 +01:00
|
|
|
}
|
|
|
|
|
2005-09-24 19:53:28 +02:00
|
|
|
/* returns the length (counted in wchars) of a multibyte string */
|
1998-09-01 06:40:42 +02:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mbstrlen(const char *mbstr)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
1998-09-01 06:40:42 +02:00
|
|
|
int len = 0;
|
|
|
|
|
2002-08-29 09:22:30 +02:00
|
|
|
/* optimization for single byte encoding */
|
|
|
|
if (pg_database_encoding_max_length() == 1)
|
2005-09-24 19:53:28 +02:00
|
|
|
return strlen(mbstr);
|
2002-08-29 09:22:30 +02:00
|
|
|
|
1998-09-01 06:40:42 +02:00
|
|
|
while (*mbstr)
|
|
|
|
{
|
|
|
|
mbstr += pg_mblen(mbstr);
|
|
|
|
len++;
|
|
|
|
}
|
2006-01-11 09:43:13 +01:00
|
|
|
return len;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2005-09-24 19:53:28 +02:00
|
|
|
/* returns the length (counted in wchars) of a multibyte string
|
2005-07-10 23:14:00 +02:00
|
|
|
* (not necessarily NULL terminated)
|
|
|
|
*/
|
1998-09-01 06:40:42 +02:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mbstrlen_with_len(const char *mbstr, int limit)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
1998-09-01 06:40:42 +02:00
|
|
|
int len = 0;
|
2005-07-10 23:14:00 +02:00
|
|
|
|
|
|
|
/* optimization for single byte encoding */
|
|
|
|
if (pg_database_encoding_max_length() == 1)
|
|
|
|
return limit;
|
1998-09-01 06:40:42 +02:00
|
|
|
|
2001-03-08 01:24:34 +01:00
|
|
|
while (limit > 0 && *mbstr)
|
1998-09-01 06:40:42 +02:00
|
|
|
{
|
2005-10-15 04:49:52 +02:00
|
|
|
int l = pg_mblen(mbstr);
|
2005-07-10 23:14:00 +02:00
|
|
|
|
1998-09-01 06:40:42 +02:00
|
|
|
limit -= l;
|
|
|
|
mbstr += l;
|
|
|
|
len++;
|
|
|
|
}
|
2006-01-11 09:43:13 +01:00
|
|
|
return len;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
1998-09-25 03:46:25 +02:00
|
|
|
/*
|
2002-09-03 23:45:44 +02:00
|
|
|
* returns the byte length of a multibyte string
|
2009-01-04 19:37:36 +01:00
|
|
|
* (not necessarily NULL terminated)
|
2001-07-15 13:07:37 +02:00
|
|
|
* that is no longer than limit.
|
2009-01-04 19:37:36 +01:00
|
|
|
* this function does not break multibyte character boundary.
|
1998-09-25 03:46:25 +02:00
|
|
|
*/
|
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mbcliplen(const char *mbstr, int len, int limit)
|
1998-09-25 03:46:25 +02:00
|
|
|
{
|
2009-01-04 19:37:36 +01:00
|
|
|
return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
|
|
|
|
len, limit);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* pg_mbcliplen with specified encoding
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pg_encoding_mbcliplen(int encoding, const char *mbstr,
|
|
|
|
int len, int limit)
|
|
|
|
{
|
|
|
|
mblen_converter mblen_fn;
|
1998-09-25 03:46:25 +02:00
|
|
|
int clen = 0;
|
|
|
|
int l;
|
|
|
|
|
2002-08-29 09:22:30 +02:00
|
|
|
/* optimization for single byte encoding */
|
2009-01-04 19:37:36 +01:00
|
|
|
if (pg_encoding_max_length(encoding) == 1)
|
2002-08-29 09:22:30 +02:00
|
|
|
return cliplen(mbstr, len, limit);
|
|
|
|
|
2009-01-04 19:37:36 +01:00
|
|
|
mblen_fn = pg_wchar_table[encoding].mblen;
|
|
|
|
|
2001-03-08 01:24:34 +01:00
|
|
|
while (len > 0 && *mbstr)
|
1998-09-25 03:46:25 +02:00
|
|
|
{
|
2009-01-04 19:37:36 +01:00
|
|
|
l = (*mblen_fn) ((const unsigned char *) mbstr);
|
1999-05-25 18:15:34 +02:00
|
|
|
if ((clen + l) > limit)
|
1998-09-25 03:46:25 +02:00
|
|
|
break;
|
|
|
|
clen += l;
|
1999-05-25 18:15:34 +02:00
|
|
|
if (clen == limit)
|
1998-09-25 03:46:25 +02:00
|
|
|
break;
|
|
|
|
len -= l;
|
|
|
|
mbstr += l;
|
|
|
|
}
|
2006-01-11 09:43:13 +01:00
|
|
|
return clen;
|
1998-09-25 03:46:25 +02:00
|
|
|
}
|
|
|
|
|
1998-07-24 05:32:46 +02:00
|
|
|
/*
|
2002-08-29 09:22:30 +02:00
|
|
|
* Similar to pg_mbcliplen except the limit parameter specifies the
|
2009-01-04 19:37:36 +01:00
|
|
|
* character length, not the byte length.
|
|
|
|
*/
|
2001-07-15 13:07:37 +02:00
|
|
|
int
|
2005-09-24 19:53:28 +02:00
|
|
|
pg_mbcharcliplen(const char *mbstr, int len, int limit)
|
2001-07-15 13:07:37 +02:00
|
|
|
{
|
|
|
|
int clen = 0;
|
|
|
|
int nch = 0;
|
|
|
|
int l;
|
|
|
|
|
2002-08-29 09:22:30 +02:00
|
|
|
/* optimization for single byte encoding */
|
|
|
|
if (pg_database_encoding_max_length() == 1)
|
|
|
|
return cliplen(mbstr, len, limit);
|
|
|
|
|
2001-07-15 13:07:37 +02:00
|
|
|
while (len > 0 && *mbstr)
|
|
|
|
{
|
|
|
|
l = pg_mblen(mbstr);
|
|
|
|
nch++;
|
|
|
|
if (nch > limit)
|
|
|
|
break;
|
|
|
|
clen += l;
|
|
|
|
len -= l;
|
|
|
|
mbstr += l;
|
|
|
|
}
|
2006-01-11 09:43:13 +01:00
|
|
|
return clen;
|
2001-07-15 13:07:37 +02:00
|
|
|
}
|
|
|
|
|
2009-01-04 19:37:36 +01:00
|
|
|
/* mbcliplen for any single-byte encoding */
|
|
|
|
static int
|
|
|
|
cliplen(const char *str, int len, int limit)
|
|
|
|
{
|
|
|
|
int l = 0;
|
|
|
|
|
|
|
|
len = Min(len, limit);
|
|
|
|
while (l < len && str[l])
|
|
|
|
l++;
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
1998-07-24 05:32:46 +02:00
|
|
|
void
|
|
|
|
SetDatabaseEncoding(int encoding)
|
|
|
|
{
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
if (!PG_VALID_BE_ENCODING(encoding))
|
2006-12-21 17:05:16 +01:00
|
|
|
elog(ERROR, "invalid database encoding: %d", encoding);
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
|
2001-10-25 07:50:21 +02:00
|
|
|
DatabaseEncoding = &pg_enc2name_tbl[encoding];
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
Assert(DatabaseEncoding->encoding == encoding);
|
2009-03-08 17:07:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
SetMessageEncoding(int encoding)
|
2009-03-08 17:07:12 +01:00
|
|
|
{
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
/* Some calls happen before we can elog()! */
|
|
|
|
Assert(PG_VALID_ENCODING(encoding));
|
2009-01-22 11:09:48 +01:00
|
|
|
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
MessageEncoding = &pg_enc2name_tbl[encoding];
|
|
|
|
Assert(MessageEncoding->encoding == encoding);
|
|
|
|
}
|
2009-04-08 11:50:48 +02:00
|
|
|
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
#ifdef ENABLE_NLS
|
|
|
|
/*
|
|
|
|
* Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
|
|
|
|
* codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
|
|
|
|
* fail for gettext-internal causes like out-of-memory.
|
|
|
|
*/
|
|
|
|
static bool
|
|
|
|
raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
|
|
|
|
{
|
|
|
|
bool elog_ok = (CurrentMemoryContext != NULL);
|
|
|
|
int i;
|
2009-04-08 11:50:48 +02:00
|
|
|
|
2009-04-24 10:43:51 +02:00
|
|
|
for (i = 0; pg_enc2gettext_tbl[i].name != NULL; i++)
|
2009-03-08 17:07:12 +01:00
|
|
|
{
|
2009-04-24 10:43:51 +02:00
|
|
|
if (pg_enc2gettext_tbl[i].encoding == encoding)
|
2009-01-22 11:09:48 +01:00
|
|
|
{
|
2009-03-08 17:07:12 +01:00
|
|
|
if (bind_textdomain_codeset(domainname,
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
pg_enc2gettext_tbl[i].name) != NULL)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (elog_ok)
|
2009-03-08 17:07:12 +01:00
|
|
|
elog(LOG, "bind_textdomain_codeset failed");
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
else
|
|
|
|
write_stderr("bind_textdomain_codeset failed");
|
|
|
|
|
2009-03-08 17:07:12 +01:00
|
|
|
break;
|
2009-01-22 11:09:48 +01:00
|
|
|
}
|
|
|
|
}
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Bind a gettext message domain to the codeset corresponding to the database
|
|
|
|
* encoding. For SQL_ASCII, instead bind to the codeset implied by LC_CTYPE.
|
|
|
|
* Return the MessageEncoding implied by the new settings.
|
|
|
|
*
|
|
|
|
* On most platforms, gettext defaults to the codeset implied by LC_CTYPE.
|
|
|
|
* When that matches the database encoding, we don't need to do anything. In
|
|
|
|
* CREATE DATABASE, we enforce or trust that the locale's codeset matches the
|
2014-05-06 18:12:18 +02:00
|
|
|
* database encoding, except for the C locale. (On Windows, we also permit a
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
* discrepancy under the UTF8 encoding.) For the C locale, explicitly bind
|
|
|
|
* gettext to the right codeset.
|
|
|
|
*
|
2014-05-06 18:12:18 +02:00
|
|
|
* On Windows, gettext defaults to the Windows ANSI code page. This is a
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
* convenient departure for software that passes the strings to Windows ANSI
|
|
|
|
* APIs, but we don't do that. Compel gettext to use database encoding or,
|
|
|
|
* failing that, the LC_CTYPE encoding as it would on other platforms.
|
|
|
|
*
|
|
|
|
* This function is called before elog() and palloc() are usable.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pg_bind_textdomain_codeset(const char *domainname)
|
|
|
|
{
|
|
|
|
bool elog_ok = (CurrentMemoryContext != NULL);
|
|
|
|
int encoding = GetDatabaseEncoding();
|
|
|
|
int new_msgenc;
|
|
|
|
|
|
|
|
#ifndef WIN32
|
|
|
|
const char *ctype = setlocale(LC_CTYPE, NULL);
|
|
|
|
|
|
|
|
if (pg_strcasecmp(ctype, "C") == 0 || pg_strcasecmp(ctype, "POSIX") == 0)
|
2008-05-27 14:24:42 +02:00
|
|
|
#endif
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
if (encoding != PG_SQL_ASCII &&
|
|
|
|
raw_pg_bind_textdomain_codeset(domainname, encoding))
|
|
|
|
return encoding;
|
|
|
|
|
|
|
|
new_msgenc = pg_get_encoding_from_locale(NULL, elog_ok);
|
|
|
|
if (new_msgenc < 0)
|
|
|
|
new_msgenc = PG_SQL_ASCII;
|
|
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
if (!raw_pg_bind_textdomain_codeset(domainname, new_msgenc))
|
|
|
|
/* On failure, the old message encoding remains valid. */
|
|
|
|
return GetMessageEncoding();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return new_msgenc;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
#endif
|
1998-07-24 05:32:46 +02:00
|
|
|
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
/*
|
|
|
|
* The database encoding, also called the server encoding, represents the
|
|
|
|
* encoding of data stored in text-like data types. Affected types include
|
|
|
|
* cstring, text, varchar, name, xml, and json.
|
|
|
|
*/
|
1998-07-24 05:32:46 +02:00
|
|
|
int
|
2001-09-21 17:27:38 +02:00
|
|
|
GetDatabaseEncoding(void)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
2006-01-11 09:43:13 +01:00
|
|
|
return DatabaseEncoding->encoding;
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
2001-09-21 17:27:38 +02:00
|
|
|
GetDatabaseEncodingName(void)
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
{
|
2006-01-11 09:43:13 +01:00
|
|
|
return DatabaseEncoding->name;
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
|
|
|
|
2000-06-13 09:35:40 +02:00
|
|
|
Datum
|
|
|
|
getdatabaseencoding(PG_FUNCTION_ARGS)
|
1998-07-24 05:32:46 +02:00
|
|
|
{
|
Commit Karel's patch.
-------------------------------------------------------------------
Subject: Re: [PATCHES] encoding names
From: Karel Zak <zakkr@zf.jcu.cz>
To: Peter Eisentraut <peter_e@gmx.net>
Cc: pgsql-patches <pgsql-patches@postgresql.org>
Date: Fri, 31 Aug 2001 17:24:38 +0200
On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote:
> > - convert encoding 'name' to 'id'
>
> I thought we decided not to add functions returning "new" names until we
> know exactly what the new names should be, and pending schema
Ok, the patch not to add functions.
> better
>
> ...(): encoding name too long
Fixed.
I found new bug in command/variable.c in parse_client_encoding(), nobody
probably never see this error:
if (pg_set_client_encoding(encoding))
{
elog(ERROR, "Conversion between %s and %s is not supported",
value, GetDatabaseEncodingName());
}
because pg_set_client_encoding() returns -1 for error and 0 as true.
It's fixed too.
IMHO it can be apply.
Karel
PS:
* following files are renamed:
src/utils/mb/Unicode/KOI8_to_utf8.map -->
src/utils/mb/Unicode/koi8r_to_utf8.map
src/utils/mb/Unicode/WIN_to_utf8.map -->
src/utils/mb/Unicode/win1251_to_utf8.map
src/utils/mb/Unicode/utf8_to_KOI8.map -->
src/utils/mb/Unicode/utf8_to_koi8r.map
src/utils/mb/Unicode/utf8_to_WIN.map -->
src/utils/mb/Unicode/utf8_to_win1251.map
* new file:
src/utils/mb/encname.c
* removed file:
src/utils/mb/common.c
--
Karel Zak <zakkr@zf.jcu.cz>
http://home.zf.jcu.cz/~zakkr/
C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
|
|
|
return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
|
1998-07-24 05:32:46 +02:00
|
|
|
}
|
2001-10-12 04:08:34 +02:00
|
|
|
|
|
|
|
Datum
|
|
|
|
pg_client_encoding(PG_FUNCTION_ARGS)
|
|
|
|
{
|
|
|
|
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
|
|
|
|
}
|
2009-10-17 02:24:51 +02:00
|
|
|
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
/*
|
|
|
|
* gettext() returns messages in this encoding. This often matches the
|
|
|
|
* database encoding, but it differs for SQL_ASCII databases, for processes
|
|
|
|
* not attached to a database, and under a database encoding lacking iconv
|
|
|
|
* support (MULE_INTERNAL).
|
|
|
|
*/
|
2009-10-17 02:24:51 +02:00
|
|
|
int
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
GetMessageEncoding(void)
|
2009-10-17 02:24:51 +02:00
|
|
|
{
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
return MessageEncoding->encoding;
|
2009-10-17 02:24:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
/*
|
2017-11-12 22:03:15 +01:00
|
|
|
* Convert from MessageEncoding to a palloc'ed, null-terminated utf16
|
|
|
|
* string. The character length is also passed to utf16len if not
|
|
|
|
* null. Returns NULL iff failed. Before MessageEncoding initialization, "str"
|
|
|
|
* should be ASCII-only; this will function as though MessageEncoding is UTF8.
|
2009-10-17 02:24:51 +02:00
|
|
|
*/
|
|
|
|
WCHAR *
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
pgwin32_message_to_UTF16(const char *str, int len, int *utf16len)
|
2009-10-17 02:24:51 +02:00
|
|
|
{
|
|
|
|
WCHAR *utf16;
|
|
|
|
int dstlen;
|
|
|
|
UINT codepage;
|
|
|
|
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
codepage = pg_enc2name_tbl[GetMessageEncoding()].codepage;
|
2009-10-17 02:24:51 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Use MultiByteToWideChar directly if there is a corresponding codepage,
|
2015-08-15 02:23:09 +02:00
|
|
|
* or double conversion through UTF8 if not. Double conversion is needed,
|
|
|
|
* for example, in an ENCODING=LATIN8, LC_CTYPE=C database.
|
2009-10-17 02:24:51 +02:00
|
|
|
*/
|
|
|
|
if (codepage != 0)
|
|
|
|
{
|
|
|
|
utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
|
|
|
|
dstlen = MultiByteToWideChar(codepage, 0, str, len, utf16, len);
|
2010-07-07 17:13:21 +02:00
|
|
|
utf16[dstlen] = (WCHAR) 0;
|
2009-10-17 02:24:51 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
char *utf8;
|
|
|
|
|
2015-08-15 02:23:09 +02:00
|
|
|
/*
|
|
|
|
* XXX pg_do_encoding_conversion() requires a transaction. In the
|
|
|
|
* absence of one, hope for the input to be valid UTF8.
|
|
|
|
*/
|
|
|
|
if (IsTransactionState())
|
|
|
|
{
|
|
|
|
utf8 = (char *) pg_do_encoding_conversion((unsigned char *) str,
|
|
|
|
len,
|
|
|
|
GetMessageEncoding(),
|
|
|
|
PG_UTF8);
|
|
|
|
if (utf8 != str)
|
|
|
|
len = strlen(utf8);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
utf8 = (char *) str;
|
2009-10-17 02:24:51 +02:00
|
|
|
|
|
|
|
utf16 = (WCHAR *) palloc(sizeof(WCHAR) * (len + 1));
|
|
|
|
dstlen = MultiByteToWideChar(CP_UTF8, 0, utf8, len, utf16, len);
|
2010-07-07 17:13:21 +02:00
|
|
|
utf16[dstlen] = (WCHAR) 0;
|
2009-10-17 02:24:51 +02:00
|
|
|
|
|
|
|
if (utf8 != str)
|
|
|
|
pfree(utf8);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (dstlen == 0 && len > 0)
|
|
|
|
{
|
|
|
|
pfree(utf16);
|
2010-02-26 03:01:40 +01:00
|
|
|
return NULL; /* error */
|
2009-10-17 02:24:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (utf16len)
|
2009-10-17 07:14:52 +02:00
|
|
|
*utf16len = dstlen;
|
2009-10-17 02:24:51 +02:00
|
|
|
return utf16;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|