postgresql/src/backend/utils/mb/mbutils.c

1012 lines
26 KiB
C
Raw Normal View History

/*
* This file contains public functions for conversion between
* client encoding and server (database) encoding.
*
* Tatsuo Ishii
*
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.85 2009/04/08 09:50:48 heikki Exp $
*/
1999-07-17 18:25:28 +02:00
#include "postgres.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/syscache.h"
/*
* When converting strings between different encodings, we assume that space
* for converted result is 4-to-1 growth in the worst case. The rate for
* currently supported encoding pairs are within 3 (SJIS JIS X0201 half width
* kanna -> UTF8 is the worst case). So "4" should be enough for the moment.
*
* Note that this is not the same as the maximum character width in any
* particular encoding.
*/
#define MAX_CONVERSION_GROWTH 4
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
/*
* We maintain a simple linked list caching the fmgr lookup info for the
* currently selected conversion functions, as well as any that have been
* selected previously in the current session. (We remember previous
* settings because we must be able to restore a previous setting during
* transaction rollback, without doing any fresh catalog accesses.)
*
* Since we'll never release this data, we just keep it in TopMemoryContext.
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
*/
typedef struct ConvProcInfo
{
int s_encoding; /* server and client encoding IDs */
int c_encoding;
FmgrInfo to_server_info; /* lookup info for conversion procs */
FmgrInfo to_client_info;
} ConvProcInfo;
static List *ConvProcList = NIL; /* List of ConvProcInfo */
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
/*
* These variables point to the currently active conversion functions,
* or are NULL when no conversion is needed.
*/
static FmgrInfo *ToServerConvProc = NULL;
static FmgrInfo *ToClientConvProc = NULL;
/*
* These variables track the currently selected FE and BE encodings.
*/
static pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
static pg_enc2name *DatabaseEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
/*
* During backend startup we can't set client encoding because we (a)
* can't look up the conversion functions, and (b) may not know the database
* encoding yet either. So SetClientEncoding() just accepts anything and
* remembers it for InitializeClientEncoding() to apply later.
*/
static bool backend_startup_complete = false;
static int pending_client_encoding = PG_SQL_ASCII;
/* Internal functions */
static char *perform_default_encoding_conversion(const char *src,
int len, bool is_client_to_server);
static int cliplen(const char *str, int len, int limit);
/*
* Set the client encoding and save fmgrinfo for the conversion
* function if necessary. Returns 0 if okay, -1 if not (bad encoding
* or can't support conversion)
*/
int
SetClientEncoding(int encoding, bool doit)
{
int current_server_encoding;
ListCell *lc;
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
if (!PG_VALID_FE_ENCODING(encoding))
return -1;
/* Can't do anything during startup, per notes above */
if (!backend_startup_complete)
{
if (doit)
pending_client_encoding = encoding;
return 0;
}
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
current_server_encoding = GetDatabaseEncoding();
2002-09-04 22:31:48 +02:00
/*
* Check for cases that require no conversion function.
*/
if (current_server_encoding == encoding ||
current_server_encoding == PG_SQL_ASCII ||
encoding == PG_SQL_ASCII)
{
if (doit)
{
ClientEncoding = &pg_enc2name_tbl[encoding];
ToServerConvProc = NULL;
ToClientConvProc = NULL;
}
return 0;
}
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
if (IsTransactionState())
{
/*
* If we're in a live transaction, it's safe to access the catalogs,
* so look up the functions. We repeat the lookup even if the info
* is already cached, so that we can react to changes in the contents
* of pg_conversion.
*/
Oid to_server_proc,
to_client_proc;
ConvProcInfo *convinfo;
MemoryContext oldcontext;
to_server_proc = FindDefaultConversionProc(encoding,
current_server_encoding);
if (!OidIsValid(to_server_proc))
return -1;
to_client_proc = FindDefaultConversionProc(current_server_encoding,
encoding);
if (!OidIsValid(to_client_proc))
return -1;
/*
* Done if not wanting to actually apply setting.
*/
if (!doit)
return 0;
/*
* Load the fmgr info into TopMemoryContext (could still fail here)
*/
convinfo = (ConvProcInfo *) MemoryContextAlloc(TopMemoryContext,
sizeof(ConvProcInfo));
convinfo->s_encoding = current_server_encoding;
convinfo->c_encoding = encoding;
fmgr_info_cxt(to_server_proc, &convinfo->to_server_info,
TopMemoryContext);
fmgr_info_cxt(to_client_proc, &convinfo->to_client_info,
TopMemoryContext);
/* Attach new info to head of list */
oldcontext = MemoryContextSwitchTo(TopMemoryContext);
ConvProcList = lcons(convinfo, ConvProcList);
MemoryContextSwitchTo(oldcontext);
/*
* Everything is okay, so apply the setting.
*/
ClientEncoding = &pg_enc2name_tbl[encoding];
ToServerConvProc = &convinfo->to_server_info;
ToClientConvProc = &convinfo->to_client_info;
/*
* Remove any older entry for the same encoding pair (this is just
* to avoid memory leakage).
*/
foreach(lc, ConvProcList)
{
ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
if (oldinfo == convinfo)
continue;
if (oldinfo->s_encoding == convinfo->s_encoding &&
oldinfo->c_encoding == convinfo->c_encoding)
{
ConvProcList = list_delete_ptr(ConvProcList, oldinfo);
pfree(oldinfo);
break; /* need not look further */
}
}
return 0; /* success */
}
else
{
/*
* If we're not in a live transaction, the only thing we can do
* is restore a previous setting using the cache. This covers all
* transaction-rollback cases. The only case it might not work for
* is trying to change client_encoding on the fly by editing
* postgresql.conf and SIGHUP'ing. Which would probably be a stupid
* thing to do anyway.
*/
foreach(lc, ConvProcList)
{
ConvProcInfo *oldinfo = (ConvProcInfo *) lfirst(lc);
if (oldinfo->s_encoding == current_server_encoding &&
oldinfo->c_encoding == encoding)
{
if (doit)
{
ClientEncoding = &pg_enc2name_tbl[encoding];
ToServerConvProc = &oldinfo->to_server_info;
ToClientConvProc = &oldinfo->to_client_info;
}
return 0;
}
}
return -1; /* it's not cached, so fail */
}
}
/*
* Initialize client encoding if necessary.
* called from InitPostgres() once during backend startup.
*/
void
InitializeClientEncoding(void)
{
Assert(!backend_startup_complete);
backend_startup_complete = true;
if (SetClientEncoding(pending_client_encoding, true) < 0)
{
/*
2005-10-15 04:49:52 +02:00
* Oops, the requested conversion is not available. We couldn't fail
* before, but we can now.
*/
ereport(FATAL,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("conversion between %s and %s is not supported",
pg_enc2name_tbl[pending_client_encoding].name,
GetDatabaseEncodingName())));
}
}
/*
* returns the current client encoding
*/
int
pg_get_client_encoding(void)
{
2001-09-09 03:15:11 +02:00
Assert(ClientEncoding);
return ClientEncoding->encoding;
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
}
/*
* returns the current client encoding name
*/
const char *
pg_get_client_encoding_name(void)
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
{
2001-09-09 03:15:11 +02:00
Assert(ClientEncoding);
return ClientEncoding->name;
}
/*
* Apply encoding conversion on src and return it. The encoding
* conversion function is chosen from the pg_conversion system catalog
* marked as "default". If it is not found in the schema search path,
* it's taken from pg_catalog schema. If it even is not in the schema,
* warn and return src.
*
* If conversion occurs, a palloc'd null-terminated string is returned.
* In the case of no conversion, src is returned.
*
* CAUTION: although the presence of a length argument means that callers
* can pass non-null-terminated strings, care is required because the same
* string will be passed back if no conversion occurs. Such callers *must*
* check whether result == src and handle that case differently.
*
* Note: we try to avoid raising error, since that could get us into
* infinite recursion when this function is invoked during error message
* sending. It should be OK to raise error for overlength strings though,
* since the recursion will come with a shorter message.
*/
unsigned char *
pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding, int dest_encoding)
{
unsigned char *result;
2002-09-04 22:31:48 +02:00
Oid proc;
if (!IsTransactionState())
return src;
2002-09-04 22:31:48 +02:00
if (src_encoding == dest_encoding)
return src;
if (src_encoding == PG_SQL_ASCII || dest_encoding == PG_SQL_ASCII)
return src;
if (len <= 0)
return src;
proc = FindDefaultConversionProc(src_encoding, dest_encoding);
if (!OidIsValid(proc))
{
ereport(LOG,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
2004-08-29 07:07:03 +02:00
errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
pg_encoding_to_char(src_encoding),
pg_encoding_to_char(dest_encoding))));
return src;
}
2002-09-04 22:31:48 +02:00
/*
2005-10-15 04:49:52 +02:00
* XXX we should avoid throwing errors in OidFunctionCall. Otherwise we
* are going into infinite loop! So we have to make sure that the
2002-09-04 22:31:48 +02:00
* function exists before calling OidFunctionCall.
*/
if (!SearchSysCacheExists(PROCOID,
2002-09-04 22:31:48 +02:00
ObjectIdGetDatum(proc),
0, 0, 0))
{
elog(LOG, "cache lookup failed for function %u", proc);
return src;
}
/*
* Allocate space for conversion result, being wary of integer overflow
*/
if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("out of memory"),
2007-11-15 22:14:46 +01:00
errdetail("String of %d bytes is too long for encoding conversion.",
len)));
result = palloc(len * MAX_CONVERSION_GROWTH + 1);
OidFunctionCall5(proc,
Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding),
CStringGetDatum(src),
CStringGetDatum(result),
Int32GetDatum(len));
return result;
}
/*
* Convert string using encoding_name. The source
* encoding is the DB encoding.
*
* BYTEA convert_to(TEXT string, NAME encoding_name) */
Datum
pg_convert_to(PG_FUNCTION_ARGS)
{
2002-09-04 22:31:48 +02:00
Datum string = PG_GETARG_DATUM(0);
Datum dest_encoding_name = PG_GETARG_DATUM(1);
Datum src_encoding_name = DirectFunctionCall1(namein,
CStringGetDatum(DatabaseEncoding->name));
2002-09-04 22:31:48 +02:00
Datum result;
2007-11-15 22:14:46 +01:00
/*
* pg_convert expects a bytea as its first argument. We're passing it a
* text argument here, relying on the fact that they are both in fact
* varlena types, and thus structurally identical.
*/
result = DirectFunctionCall3(pg_convert, string,
src_encoding_name, dest_encoding_name);
PG_RETURN_DATUM(result);
}
/*
* Convert string using encoding_name. The destination
* encoding is the DB encoding.
*
* TEXT convert_from(BYTEA string, NAME encoding_name) */
Datum
pg_convert_from(PG_FUNCTION_ARGS)
{
Datum string = PG_GETARG_DATUM(0);
Datum src_encoding_name = PG_GETARG_DATUM(1);
Datum dest_encoding_name = DirectFunctionCall1(namein,
CStringGetDatum(DatabaseEncoding->name));
Datum result;
result = DirectFunctionCall3(pg_convert, string,
src_encoding_name, dest_encoding_name);
2007-11-15 22:14:46 +01:00
/*
* pg_convert returns a bytea, which we in turn return as text, relying on
* the fact that they are both in fact varlena types, and thus
* structurally identical. Although not all bytea values are valid text,
* in this case it will be because we've told pg_convert to return one
* that is valid as text in the current database encoding.
*/
PG_RETURN_DATUM(result);
}
/*
* Convert string using encoding_names.
*
* BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
*/
Datum
pg_convert(PG_FUNCTION_ARGS)
{
bytea *string = PG_GETARG_BYTEA_P(0);
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
int src_encoding = pg_char_to_encoding(src_encoding_name);
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
unsigned char *result;
bytea *retval;
unsigned char *str;
2002-09-04 22:31:48 +02:00
int len;
if (src_encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid source encoding name \"%s\"",
src_encoding_name)));
if (dest_encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid destination encoding name \"%s\"",
dest_encoding_name)));
/* make sure that source string is valid and null terminated */
len = VARSIZE(string) - VARHDRSZ;
2007-11-15 22:14:46 +01:00
pg_verify_mbstr(src_encoding, VARDATA(string), len, false);
str = palloc(len + 1);
memcpy(str, VARDATA(string), len);
*(str + len) = '\0';
result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);
2002-09-04 22:31:48 +02:00
/*
* build bytea data type structure.
2002-09-04 22:31:48 +02:00
*/
len = strlen((char *) result) + VARHDRSZ;
retval = palloc(len);
SET_VARSIZE(retval, len);
memcpy(VARDATA(retval), result, len - VARHDRSZ);
if (result != str)
pfree(result);
2002-09-04 22:31:48 +02:00
pfree(str);
/* free memory if allocated by the toaster */
PG_FREE_IF_COPY(string, 0);
PG_RETURN_BYTEA_P(retval);
}
/*
* get the length of the string considered as text in the specified
* encoding. Raises an error if the data is not valid in that
* encoding.
*
* INT4 length (BYTEA string, NAME src_encoding_name)
*/
Datum
length_in_encoding(PG_FUNCTION_ARGS)
{
2007-11-15 22:14:46 +01:00
bytea *string = PG_GETARG_BYTEA_P(0);
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
int src_encoding = pg_char_to_encoding(src_encoding_name);
2007-11-15 22:14:46 +01:00
int len = VARSIZE(string) - VARHDRSZ;
int retval;
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
if (src_encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid encoding name \"%s\"",
src_encoding_name)));
retval = pg_verify_mbstr_len(src_encoding, VARDATA(string), len, false);
PG_RETURN_INT32(retval);
2007-11-15 22:14:46 +01:00
}
/*
* convert client encoding to server encoding.
*/
char *
pg_client_to_server(const char *s, int len)
{
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
Assert(DatabaseEncoding);
2001-09-09 03:15:11 +02:00
Assert(ClientEncoding);
if (len <= 0)
return (char *) s;
if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
ClientEncoding->encoding == PG_SQL_ASCII)
{
/*
* No conversion is needed, but we must still validate the data.
*/
(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
return (char *) s;
}
if (DatabaseEncoding->encoding == PG_SQL_ASCII)
{
/*
* No conversion is possible, but we must still validate the data,
2006-10-04 02:30:14 +02:00
* because the client-side code might have done string escaping using
* the selected client_encoding. If the client encoding is ASCII-safe
* then we just do a straight validation under that encoding. For an
* ASCII-unsafe encoding we have a problem: we dare not pass such data
* to the parser but we have no way to convert it. We compromise by
* rejecting the data if it contains any non-ASCII characters.
*/
if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
(void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
else
{
2006-10-04 02:30:14 +02:00
int i;
for (i = 0; i < len; i++)
{
if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
2006-10-04 02:30:14 +02:00
errmsg("invalid byte value for encoding \"%s\": 0x%02x",
pg_enc2name_tbl[PG_SQL_ASCII].name,
(unsigned char) s[i])));
}
}
return (char *) s;
}
return perform_default_encoding_conversion(s, len, true);
}
/*
* convert server encoding to client encoding.
*/
char *
pg_server_to_client(const char *s, int len)
{
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
Assert(DatabaseEncoding);
2001-09-09 03:15:11 +02:00
Assert(ClientEncoding);
if (len <= 0)
return (char *) s;
if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
ClientEncoding->encoding == PG_SQL_ASCII ||
DatabaseEncoding->encoding == PG_SQL_ASCII)
return (char *) s; /* assume data is valid */
return perform_default_encoding_conversion(s, len, false);
}
/*
2002-09-04 22:31:48 +02:00
* Perform default encoding conversion using cached FmgrInfo. Since
* this function does not access database at all, it is safe to call
* outside transactions. If the conversion has not been set up by
* SetClientEncoding(), no conversion is performed.
*/
static char *
perform_default_encoding_conversion(const char *src, int len, bool is_client_to_server)
{
char *result;
2002-09-04 22:31:48 +02:00
int src_encoding,
dest_encoding;
FmgrInfo *flinfo;
if (is_client_to_server)
{
src_encoding = ClientEncoding->encoding;
dest_encoding = DatabaseEncoding->encoding;
flinfo = ToServerConvProc;
}
else
{
src_encoding = DatabaseEncoding->encoding;
dest_encoding = ClientEncoding->encoding;
flinfo = ToClientConvProc;
}
if (flinfo == NULL)
return (char *) src;
/*
* Allocate space for conversion result, being wary of integer overflow
*/
if ((Size) len >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("out of memory"),
2007-11-15 22:14:46 +01:00
errdetail("String of %d bytes is too long for encoding conversion.",
len)));
result = palloc(len * MAX_CONVERSION_GROWTH + 1);
FunctionCall5(flinfo,
Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding),
CStringGetDatum(src),
CStringGetDatum(result),
Int32GetDatum(len));
return result;
}
#ifdef USE_WIDE_UPPER_LOWER
/*
* wchar2char --- convert wide characters to multibyte format
*
* This has the same API as the standard wcstombs() function; in particular,
* tolen is the maximum number of bytes to store at *to, and *from must be
* zero-terminated. The output will be zero-terminated iff there is room.
*/
size_t
wchar2char(char *to, const wchar_t *from, size_t tolen)
{
size_t result;
if (tolen == 0)
return 0;
#ifdef WIN32
/*
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
* and for some reason mbstowcs and wcstombs won't do this for us,
* so we use MultiByteToWideChar().
*/
if (GetDatabaseEncoding() == PG_UTF8)
{
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
NULL, NULL);
/* A zero return is failure */
if (result <= 0)
result = -1;
else
{
Assert(result <= tolen);
/* Microsoft counts the zero terminator in the result */
result--;
}
}
else
#endif /* WIN32 */
{
Assert( !lc_ctype_is_c() );
result = wcstombs(to, from, tolen);
}
return result;
}
/*
* char2wchar --- convert multibyte characters to wide characters
*
* This has almost the API of mbstowcs(), except that *from need not be
* null-terminated; instead, the number of input bytes is specified as
* fromlen. Also, we ereport() rather than returning -1 for invalid
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
* The output will be zero-terminated iff there is room.
*/
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
{
size_t result;
if (tolen == 0)
return 0;
#ifdef WIN32
/* See WIN32 "Unicode" comment above */
if (GetDatabaseEncoding() == PG_UTF8)
{
/* Win32 API does not work for zero-length input */
if (fromlen == 0)
result = 0;
else
{
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
/* A zero return is failure */
if (result == 0)
result = -1;
}
if (result != -1)
{
Assert(result < tolen);
/* Append trailing null wchar (MultiByteToWideChar() does not) */
to[result] = 0;
}
}
else
#endif /* WIN32 */
{
/* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen);
Assert( !lc_ctype_is_c() );
result = mbstowcs(to, str, tolen);
pfree(str);
}
if (result == -1)
{
/*
* Invalid multibyte character encountered. We try to give a useful
* error message by letting pg_verifymbstr check the string. But it's
* possible that the string is OK to us, and not OK to mbstowcs ---
* this suggests that the LC_CTYPE locale is different from the
* database encoding. Give a generic error message if verifymbstr
* can't find anything wrong.
*/
pg_verifymbstr(from, fromlen, false); /* might not return */
/* but if it does ... */
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
}
return result;
}
#endif
/* convert a multibyte string to a wchar */
int
pg_mb2wchar(const char *from, pg_wchar *to)
{
return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, strlen(from));
}
/* convert a multibyte string to a wchar with a limited length */
int
pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len)
{
return (*pg_wchar_table[DatabaseEncoding->encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
}
/* same, with any encoding */
int
pg_encoding_mb2wchar_with_len(int encoding,
const char *from, pg_wchar *to, int len)
{
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
}
/* returns the byte length of a multibyte character */
int
pg_mblen(const char *mbstr)
{
return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
}
/* returns the display length of a multibyte character */
int
pg_dsplen(const char *mbstr)
{
return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) ((const unsigned char *) mbstr));
}
/* returns the length (counted in wchars) of a multibyte string */
int
pg_mbstrlen(const char *mbstr)
{
int len = 0;
/* optimization for single byte encoding */
if (pg_database_encoding_max_length() == 1)
return strlen(mbstr);
while (*mbstr)
{
mbstr += pg_mblen(mbstr);
len++;
}
return len;
}
/* returns the length (counted in wchars) of a multibyte string
* (not necessarily NULL terminated)
*/
int
pg_mbstrlen_with_len(const char *mbstr, int limit)
{
int len = 0;
/* optimization for single byte encoding */
if (pg_database_encoding_max_length() == 1)
return limit;
while (limit > 0 && *mbstr)
{
2005-10-15 04:49:52 +02:00
int l = pg_mblen(mbstr);
limit -= l;
mbstr += l;
len++;
}
return len;
}
1998-09-25 03:46:25 +02:00
/*
* returns the byte length of a multibyte string
* (not necessarily NULL terminated)
* that is no longer than limit.
* this function does not break multibyte character boundary.
1998-09-25 03:46:25 +02:00
*/
int
pg_mbcliplen(const char *mbstr, int len, int limit)
1998-09-25 03:46:25 +02:00
{
return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
len, limit);
}
/*
* pg_mbcliplen with specified encoding
*/
int
pg_encoding_mbcliplen(int encoding, const char *mbstr,
int len, int limit)
{
mblen_converter mblen_fn;
1998-09-25 03:46:25 +02:00
int clen = 0;
int l;
/* optimization for single byte encoding */
if (pg_encoding_max_length(encoding) == 1)
return cliplen(mbstr, len, limit);
mblen_fn = pg_wchar_table[encoding].mblen;
while (len > 0 && *mbstr)
1998-09-25 03:46:25 +02:00
{
l = (*mblen_fn) ((const unsigned char *) mbstr);
1999-05-25 18:15:34 +02:00
if ((clen + l) > limit)
1998-09-25 03:46:25 +02:00
break;
clen += l;
1999-05-25 18:15:34 +02:00
if (clen == limit)
1998-09-25 03:46:25 +02:00
break;
len -= l;
mbstr += l;
}
return clen;
1998-09-25 03:46:25 +02:00
}
/*
* Similar to pg_mbcliplen except the limit parameter specifies the
* character length, not the byte length.
*/
int
pg_mbcharcliplen(const char *mbstr, int len, int limit)
{
int clen = 0;
int nch = 0;
int l;
/* optimization for single byte encoding */
if (pg_database_encoding_max_length() == 1)
return cliplen(mbstr, len, limit);
while (len > 0 && *mbstr)
{
l = pg_mblen(mbstr);
nch++;
if (nch > limit)
break;
clen += l;
len -= l;
mbstr += l;
}
return clen;
}
/* mbcliplen for any single-byte encoding */
static int
cliplen(const char *str, int len, int limit)
{
int l = 0;
len = Min(len, limit);
while (l < len && str[l])
l++;
return l;
}
#if defined(ENABLE_NLS)
static const struct codeset_map {
int encoding;
const char *codeset;
} codeset_map_array[] = {
{PG_UTF8, "UTF-8"},
{PG_LATIN1, "LATIN1"},
{PG_LATIN2, "LATIN2"},
{PG_LATIN3, "LATIN3"},
{PG_LATIN4, "LATIN4"},
{PG_ISO_8859_5, "ISO-8859-5"},
{PG_ISO_8859_6, "ISO_8859-6"},
{PG_ISO_8859_7, "ISO-8859-7"},
{PG_ISO_8859_8, "ISO-8859-8"},
{PG_LATIN5, "LATIN5"},
{PG_LATIN6, "LATIN6"},
{PG_LATIN7, "LATIN7"},
{PG_LATIN8, "LATIN8"},
{PG_LATIN9, "LATIN-9"},
{PG_LATIN10, "LATIN10"},
{PG_KOI8R, "KOI8-R"},
{PG_KOI8U, "KOI8-U"},
{PG_WIN1250, "CP1250"},
{PG_WIN1251, "CP1251"},
{PG_WIN1252, "CP1252"},
{PG_WIN1253, "CP1253"},
{PG_WIN1254, "CP1254"},
{PG_WIN1255, "CP1255"},
{PG_WIN1256, "CP1256"},
{PG_WIN1257, "CP1257"},
{PG_WIN1258, "CP1258"},
{PG_WIN866, "CP866"},
{PG_WIN874, "CP874"},
{PG_EUC_CN, "EUC-CN"},
{PG_EUC_JP, "EUC-JP"},
{PG_EUC_KR, "EUC-KR"},
{PG_EUC_TW, "EUC-TW"},
{PG_EUC_JIS_2004, "EUC-JP"}
};
#endif /* ENABLE_NLS */
void
SetDatabaseEncoding(int encoding)
{
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
if (!PG_VALID_BE_ENCODING(encoding))
elog(ERROR, "invalid database encoding: %d", encoding);
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
DatabaseEncoding = &pg_enc2name_tbl[encoding];
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
Assert(DatabaseEncoding->encoding == encoding);
}
/*
* Bind gettext to the codeset equivalent with the database encoding.
*/
void
pg_bind_textdomain_codeset(const char *domainname)
{
#if defined(ENABLE_NLS)
int encoding = GetDatabaseEncoding();
int i;
/*
* gettext() uses the codeset specified by LC_CTYPE by default,
* so if that matches the database encoding we don't need to do
* anything. In CREATE DATABASE, we enforce or trust that the
* locale's codeset matches database encoding, except for the C
* locale. In C locale, we bind gettext() explicitly to the right
* codeset.
*
* On Windows, though, gettext() tends to get confused so we always
* bind it.
*/
#ifndef WIN32
const char *ctype = setlocale(LC_CTYPE, NULL);
if (pg_strcasecmp(ctype, "C") != 0 && pg_strcasecmp(ctype, "POSIX") != 0)
return;
#endif
for (i = 0; i < lengthof(codeset_map_array); i++)
{
if (codeset_map_array[i].encoding == encoding)
{
if (bind_textdomain_codeset(domainname,
codeset_map_array[i].codeset) == NULL)
elog(LOG, "bind_textdomain_codeset failed");
break;
}
}
#endif
}
int
GetDatabaseEncoding(void)
{
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
Assert(DatabaseEncoding);
return DatabaseEncoding->encoding;
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
}
const char *
GetDatabaseEncodingName(void)
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
{
Assert(DatabaseEncoding);
return DatabaseEncoding->name;
}
Datum
getdatabaseencoding(PG_FUNCTION_ARGS)
{
Commit Karel's patch. ------------------------------------------------------------------- Subject: Re: [PATCHES] encoding names From: Karel Zak <zakkr@zf.jcu.cz> To: Peter Eisentraut <peter_e@gmx.net> Cc: pgsql-patches <pgsql-patches@postgresql.org> Date: Fri, 31 Aug 2001 17:24:38 +0200 On Thu, Aug 30, 2001 at 01:30:40AM +0200, Peter Eisentraut wrote: > > - convert encoding 'name' to 'id' > > I thought we decided not to add functions returning "new" names until we > know exactly what the new names should be, and pending schema Ok, the patch not to add functions. > better > > ...(): encoding name too long Fixed. I found new bug in command/variable.c in parse_client_encoding(), nobody probably never see this error: if (pg_set_client_encoding(encoding)) { elog(ERROR, "Conversion between %s and %s is not supported", value, GetDatabaseEncodingName()); } because pg_set_client_encoding() returns -1 for error and 0 as true. It's fixed too. IMHO it can be apply. Karel PS: * following files are renamed: src/utils/mb/Unicode/KOI8_to_utf8.map --> src/utils/mb/Unicode/koi8r_to_utf8.map src/utils/mb/Unicode/WIN_to_utf8.map --> src/utils/mb/Unicode/win1251_to_utf8.map src/utils/mb/Unicode/utf8_to_KOI8.map --> src/utils/mb/Unicode/utf8_to_koi8r.map src/utils/mb/Unicode/utf8_to_WIN.map --> src/utils/mb/Unicode/utf8_to_win1251.map * new file: src/utils/mb/encname.c * removed file: src/utils/mb/common.c -- Karel Zak <zakkr@zf.jcu.cz> http://home.zf.jcu.cz/~zakkr/ C, PostgreSQL, PHP, WWW, http://docs.linux.cz, http://mape.jcu.cz
2001-09-06 06:57:30 +02:00
Assert(DatabaseEncoding);
return DirectFunctionCall1(namein, CStringGetDatum(DatabaseEncoding->name));
}
Datum
pg_client_encoding(PG_FUNCTION_ARGS)
{
Assert(ClientEncoding);
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
}