2020-01-16 21:58:24 +01:00
|
|
|
/*-------------------------------------------------------------------------
|
2001-09-07 05:32:11 +02:00
|
|
|
*
|
2020-01-16 21:58:24 +01:00
|
|
|
* encnames.c
|
|
|
|
* Encoding names and routines for working with them.
|
|
|
|
*
|
2022-01-08 01:04:57 +01:00
|
|
|
* Portions Copyright (c) 2001-2022, PostgreSQL Global Development Group
|
2020-01-16 21:58:24 +01:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/common/encnames.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
2001-09-07 05:32:11 +02:00
|
|
|
*/
|
2020-01-17 00:08:21 +01:00
|
|
|
#include "c.h"
|
2001-09-07 05:32:11 +02:00
|
|
|
|
2007-10-13 22:18:42 +02:00
|
|
|
#include <ctype.h>
|
2001-09-07 05:32:11 +02:00
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "mb/pg_wchar.h"
|
2007-10-13 22:18:42 +02:00
|
|
|
|
2001-09-07 05:32:11 +02:00
|
|
|
|
|
|
|
/* ----------
|
2001-09-07 17:01:45 +02:00
|
|
|
* All encoding names, sorted: *** A L P H A B E T I C ***
|
2001-09-07 05:32:11 +02:00
|
|
|
*
|
2003-03-10 23:28:22 +01:00
|
|
|
* All names must be without irrelevant chars, search routines use
|
2001-09-07 05:32:11 +02:00
|
|
|
* isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
|
|
|
|
* are always converted to 'iso88591'. All must be lower case.
|
|
|
|
*
|
2005-03-07 05:30:55 +01:00
|
|
|
* The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
|
2001-09-07 05:32:11 +02:00
|
|
|
*
|
|
|
|
* Karel Zak, Aug 2001
|
|
|
|
* ----------
|
2001-09-07 17:01:45 +02:00
|
|
|
*/
|
2014-01-18 22:04:11 +01:00
|
|
|
typedef struct pg_encname
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
pg_enc encoding;
|
|
|
|
} pg_encname;
|
|
|
|
|
|
|
|
static const pg_encname pg_encname_tbl[] =
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"abc", PG_WIN1258
|
|
|
|
}, /* alias for WIN1258 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"alt", PG_WIN866
|
2001-09-07 05:32:11 +02:00
|
|
|
}, /* IBM866 */
|
|
|
|
{
|
|
|
|
"big5", PG_BIG5
|
2002-09-03 23:45:44 +02:00
|
|
|
}, /* Big5; Chinese for Taiwan multibyte set */
|
2001-10-16 12:09:17 +02:00
|
|
|
{
|
|
|
|
"euccn", PG_EUC_CN
|
|
|
|
}, /* EUC-CN; Extended Unix Code for simplified
|
|
|
|
* Chinese */
|
2007-03-25 13:56:04 +02:00
|
|
|
{
|
|
|
|
"eucjis2004", PG_EUC_JIS_2004
|
|
|
|
}, /* EUC-JIS-2004; Extended UNIX Code fixed
|
|
|
|
* Width for Japanese, standard JIS X 0213 */
|
2001-10-16 12:09:17 +02:00
|
|
|
{
|
|
|
|
"eucjp", PG_EUC_JP
|
|
|
|
}, /* EUC-JP; Extended UNIX Code fixed Width for
|
2003-03-10 23:28:22 +01:00
|
|
|
* Japanese, standard OSF */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"euckr", PG_EUC_KR
|
2002-03-05 06:52:50 +01:00
|
|
|
}, /* EUC-KR; Extended Unix Code for Korean , KS
|
|
|
|
* X 1001 standard */
|
2001-10-16 12:09:17 +02:00
|
|
|
{
|
|
|
|
"euctw", PG_EUC_TW
|
|
|
|
}, /* EUC-TW; Extended Unix Code for
|
2002-09-04 22:31:48 +02:00
|
|
|
*
|
2001-10-16 12:09:17 +02:00
|
|
|
* traditional Chinese */
|
2002-06-13 10:30:22 +02:00
|
|
|
{
|
|
|
|
"gb18030", PG_GB18030
|
|
|
|
}, /* GB18030;GB18030 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"gbk", PG_GBK
|
|
|
|
}, /* GBK; Chinese Windows CodePage 936
|
|
|
|
* simplified Chinese */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"iso88591", PG_LATIN1
|
|
|
|
}, /* ISO-8859-1; RFC1345,KXS2 */
|
2001-10-16 12:09:17 +02:00
|
|
|
{
|
|
|
|
"iso885910", PG_LATIN6
|
|
|
|
}, /* ISO-8859-10; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso885913", PG_LATIN7
|
|
|
|
}, /* ISO-8859-13; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso885914", PG_LATIN8
|
|
|
|
}, /* ISO-8859-14; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso885915", PG_LATIN9
|
|
|
|
}, /* ISO-8859-15; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso885916", PG_LATIN10
|
|
|
|
}, /* ISO-8859-16; RFC1345,KXS2 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"iso88592", PG_LATIN2
|
|
|
|
}, /* ISO-8859-2; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso88593", PG_LATIN3
|
|
|
|
}, /* ISO-8859-3; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso88594", PG_LATIN4
|
|
|
|
}, /* ISO-8859-4; RFC1345,KXS2 */
|
2001-10-11 16:20:35 +02:00
|
|
|
{
|
|
|
|
"iso88595", PG_ISO_8859_5
|
|
|
|
}, /* ISO-8859-5; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso88596", PG_ISO_8859_6
|
|
|
|
}, /* ISO-8859-6; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso88597", PG_ISO_8859_7
|
|
|
|
}, /* ISO-8859-7; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso88598", PG_ISO_8859_8
|
|
|
|
}, /* ISO-8859-8; RFC1345,KXS2 */
|
|
|
|
{
|
|
|
|
"iso88599", PG_LATIN5
|
|
|
|
}, /* ISO-8859-9; RFC1345,KXS2 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"johab", PG_JOHAB
|
|
|
|
}, /* JOHAB; Extended Unix Code for simplified
|
|
|
|
* Chinese */
|
2001-10-11 16:20:35 +02:00
|
|
|
{
|
|
|
|
"koi8", PG_KOI8R
|
|
|
|
}, /* _dirty_ alias for KOI8-R (backward
|
|
|
|
* compatibility) */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"koi8r", PG_KOI8R
|
|
|
|
}, /* KOI8-R; RFC1489 */
|
2009-02-10 20:29:39 +01:00
|
|
|
{
|
|
|
|
"koi8u", PG_KOI8U
|
|
|
|
}, /* KOI8-U; RFC2319 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"latin1", PG_LATIN1
|
|
|
|
}, /* alias for ISO-8859-1 */
|
2001-10-16 12:09:17 +02:00
|
|
|
{
|
|
|
|
"latin10", PG_LATIN10
|
|
|
|
}, /* alias for ISO-8859-16 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"latin2", PG_LATIN2
|
|
|
|
}, /* alias for ISO-8859-2 */
|
|
|
|
{
|
|
|
|
"latin3", PG_LATIN3
|
|
|
|
}, /* alias for ISO-8859-3 */
|
|
|
|
{
|
|
|
|
"latin4", PG_LATIN4
|
|
|
|
}, /* alias for ISO-8859-4 */
|
2001-10-11 16:20:35 +02:00
|
|
|
{
|
|
|
|
"latin5", PG_LATIN5
|
|
|
|
}, /* alias for ISO-8859-9 */
|
2001-10-16 12:09:17 +02:00
|
|
|
{
|
|
|
|
"latin6", PG_LATIN6
|
|
|
|
}, /* alias for ISO-8859-10 */
|
|
|
|
{
|
|
|
|
"latin7", PG_LATIN7
|
|
|
|
}, /* alias for ISO-8859-13 */
|
|
|
|
{
|
|
|
|
"latin8", PG_LATIN8
|
|
|
|
}, /* alias for ISO-8859-14 */
|
|
|
|
{
|
|
|
|
"latin9", PG_LATIN9
|
|
|
|
}, /* alias for ISO-8859-15 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"mskanji", PG_SJIS
|
|
|
|
}, /* alias for Shift_JIS */
|
|
|
|
{
|
|
|
|
"muleinternal", PG_MULE_INTERNAL
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"shiftjis", PG_SJIS
|
|
|
|
}, /* Shift_JIS; JIS X 0202-1991 */
|
2007-03-25 13:56:04 +02:00
|
|
|
|
|
|
|
{
|
|
|
|
"shiftjis2004", PG_SHIFT_JIS_2004
|
|
|
|
}, /* SHIFT-JIS-2004; Shift JIS for Japanese,
|
|
|
|
* standard JIS X 0213 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"sjis", PG_SJIS
|
|
|
|
}, /* alias for Shift_JIS */
|
|
|
|
{
|
|
|
|
"sqlascii", PG_SQL_ASCII
|
|
|
|
},
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"tcvn", PG_WIN1258
|
|
|
|
}, /* alias for WIN1258 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"tcvn5712", PG_WIN1258
|
|
|
|
}, /* alias for WIN1258 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"uhc", PG_UHC
|
|
|
|
}, /* UHC; Korean Windows CodePage 949 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"unicode", PG_UTF8
|
2005-03-07 05:30:55 +01:00
|
|
|
}, /* alias for UTF8 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"utf8", PG_UTF8
|
2005-03-07 05:30:55 +01:00
|
|
|
}, /* alias for UTF8 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"vscii", PG_WIN1258
|
|
|
|
}, /* alias for WIN1258 */
|
2001-10-11 16:20:35 +02:00
|
|
|
{
|
|
|
|
"win", PG_WIN1251
|
|
|
|
}, /* _dirty_ alias for windows-1251 (backward
|
|
|
|
* compatibility) */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"win1250", PG_WIN1250
|
|
|
|
}, /* alias for Windows-1250 */
|
|
|
|
{
|
|
|
|
"win1251", PG_WIN1251
|
|
|
|
}, /* alias for Windows-1251 */
|
2005-03-14 19:31:25 +01:00
|
|
|
{
|
|
|
|
"win1252", PG_WIN1252
|
|
|
|
}, /* alias for Windows-1252 */
|
2006-02-18 17:15:23 +01:00
|
|
|
{
|
|
|
|
"win1253", PG_WIN1253
|
|
|
|
}, /* alias for Windows-1253 */
|
|
|
|
{
|
|
|
|
"win1254", PG_WIN1254
|
|
|
|
}, /* alias for Windows-1254 */
|
|
|
|
{
|
|
|
|
"win1255", PG_WIN1255
|
|
|
|
}, /* alias for Windows-1255 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"win1256", PG_WIN1256
|
|
|
|
}, /* alias for Windows-1256 */
|
2006-02-18 17:15:23 +01:00
|
|
|
{
|
|
|
|
"win1257", PG_WIN1257
|
|
|
|
}, /* alias for Windows-1257 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"win1258", PG_WIN1258
|
2002-03-05 06:52:50 +01:00
|
|
|
}, /* alias for Windows-1258 */
|
2005-03-07 05:30:55 +01:00
|
|
|
{
|
|
|
|
"win866", PG_WIN866
|
|
|
|
}, /* IBM866 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"win874", PG_WIN874
|
|
|
|
}, /* alias for Windows-874 */
|
|
|
|
{
|
|
|
|
"win932", PG_SJIS
|
|
|
|
}, /* alias for Shift_JIS */
|
|
|
|
{
|
|
|
|
"win936", PG_GBK
|
|
|
|
}, /* alias for GBK */
|
|
|
|
{
|
|
|
|
"win949", PG_UHC
|
|
|
|
}, /* alias for UHC */
|
|
|
|
{
|
|
|
|
"win950", PG_BIG5
|
|
|
|
}, /* alias for BIG5 */
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
|
|
|
"windows1250", PG_WIN1250
|
|
|
|
}, /* Windows-1251; Microsoft */
|
|
|
|
{
|
|
|
|
"windows1251", PG_WIN1251
|
|
|
|
}, /* Windows-1251; Microsoft */
|
2005-03-14 19:31:25 +01:00
|
|
|
{
|
|
|
|
"windows1252", PG_WIN1252
|
|
|
|
}, /* Windows-1252; Microsoft */
|
2006-02-18 17:15:23 +01:00
|
|
|
{
|
|
|
|
"windows1253", PG_WIN1253
|
|
|
|
}, /* Windows-1253; Microsoft */
|
|
|
|
{
|
|
|
|
"windows1254", PG_WIN1254
|
|
|
|
}, /* Windows-1254; Microsoft */
|
|
|
|
{
|
|
|
|
"windows1255", PG_WIN1255
|
|
|
|
}, /* Windows-1255; Microsoft */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"windows1256", PG_WIN1256
|
|
|
|
}, /* Windows-1256; Microsoft */
|
2006-02-18 17:15:23 +01:00
|
|
|
{
|
|
|
|
"windows1257", PG_WIN1257
|
|
|
|
}, /* Windows-1257; Microsoft */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
2005-03-07 05:30:55 +01:00
|
|
|
"windows1258", PG_WIN1258
|
2002-03-05 06:52:50 +01:00
|
|
|
}, /* Windows-1258; Microsoft */
|
2005-03-07 05:30:55 +01:00
|
|
|
{
|
|
|
|
"windows866", PG_WIN866
|
|
|
|
}, /* IBM866 */
|
2002-03-05 06:52:50 +01:00
|
|
|
{
|
|
|
|
"windows874", PG_WIN874
|
|
|
|
}, /* Windows-874; Microsoft */
|
|
|
|
{
|
|
|
|
"windows932", PG_SJIS
|
|
|
|
}, /* alias for Shift_JIS */
|
|
|
|
{
|
|
|
|
"windows936", PG_GBK
|
|
|
|
}, /* alias for GBK */
|
|
|
|
{
|
|
|
|
"windows949", PG_UHC
|
|
|
|
}, /* alias for UHC */
|
|
|
|
{
|
|
|
|
"windows950", PG_BIG5
|
2014-01-18 22:04:11 +01:00
|
|
|
} /* alias for BIG5 */
|
2001-09-07 05:32:11 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
/* ----------
|
2001-10-16 12:09:17 +02:00
|
|
|
* These are "official" encoding names.
|
2007-10-16 00:46:27 +02:00
|
|
|
* XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
|
2001-09-07 05:32:11 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2009-10-17 02:24:51 +02:00
|
|
|
#ifndef WIN32
|
|
|
|
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name }
|
|
|
|
#else
|
|
|
|
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage }
|
|
|
|
#endif
|
2020-01-17 00:08:21 +01:00
|
|
|
|
2014-01-18 22:04:11 +01:00
|
|
|
const pg_enc2name pg_enc2name_tbl[] =
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
2009-10-17 02:24:51 +02:00
|
|
|
DEF_ENC2NAME(SQL_ASCII, 0),
|
|
|
|
DEF_ENC2NAME(EUC_JP, 20932),
|
|
|
|
DEF_ENC2NAME(EUC_CN, 20936),
|
|
|
|
DEF_ENC2NAME(EUC_KR, 51949),
|
|
|
|
DEF_ENC2NAME(EUC_TW, 0),
|
|
|
|
DEF_ENC2NAME(EUC_JIS_2004, 20932),
|
|
|
|
DEF_ENC2NAME(UTF8, 65001),
|
|
|
|
DEF_ENC2NAME(MULE_INTERNAL, 0),
|
|
|
|
DEF_ENC2NAME(LATIN1, 28591),
|
|
|
|
DEF_ENC2NAME(LATIN2, 28592),
|
|
|
|
DEF_ENC2NAME(LATIN3, 28593),
|
|
|
|
DEF_ENC2NAME(LATIN4, 28594),
|
|
|
|
DEF_ENC2NAME(LATIN5, 28599),
|
|
|
|
DEF_ENC2NAME(LATIN6, 0),
|
|
|
|
DEF_ENC2NAME(LATIN7, 0),
|
|
|
|
DEF_ENC2NAME(LATIN8, 0),
|
|
|
|
DEF_ENC2NAME(LATIN9, 28605),
|
|
|
|
DEF_ENC2NAME(LATIN10, 0),
|
|
|
|
DEF_ENC2NAME(WIN1256, 1256),
|
|
|
|
DEF_ENC2NAME(WIN1258, 1258),
|
|
|
|
DEF_ENC2NAME(WIN866, 866),
|
|
|
|
DEF_ENC2NAME(WIN874, 874),
|
|
|
|
DEF_ENC2NAME(KOI8R, 20866),
|
|
|
|
DEF_ENC2NAME(WIN1251, 1251),
|
|
|
|
DEF_ENC2NAME(WIN1252, 1252),
|
|
|
|
DEF_ENC2NAME(ISO_8859_5, 28595),
|
|
|
|
DEF_ENC2NAME(ISO_8859_6, 28596),
|
|
|
|
DEF_ENC2NAME(ISO_8859_7, 28597),
|
|
|
|
DEF_ENC2NAME(ISO_8859_8, 28598),
|
|
|
|
DEF_ENC2NAME(WIN1250, 1250),
|
|
|
|
DEF_ENC2NAME(WIN1253, 1253),
|
|
|
|
DEF_ENC2NAME(WIN1254, 1254),
|
|
|
|
DEF_ENC2NAME(WIN1255, 1255),
|
|
|
|
DEF_ENC2NAME(WIN1257, 1257),
|
|
|
|
DEF_ENC2NAME(KOI8U, 21866),
|
|
|
|
DEF_ENC2NAME(SJIS, 932),
|
|
|
|
DEF_ENC2NAME(BIG5, 950),
|
|
|
|
DEF_ENC2NAME(GBK, 936),
|
2015-08-15 02:23:13 +02:00
|
|
|
DEF_ENC2NAME(UHC, 949),
|
2009-10-17 02:24:51 +02:00
|
|
|
DEF_ENC2NAME(GB18030, 54936),
|
|
|
|
DEF_ENC2NAME(JOHAB, 0),
|
|
|
|
DEF_ENC2NAME(SHIFT_JIS_2004, 932)
|
2001-09-07 05:32:11 +02:00
|
|
|
};
|
|
|
|
|
2009-04-24 10:43:51 +02:00
|
|
|
/* ----------
|
|
|
|
* These are encoding names for gettext.
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
*
|
|
|
|
* This covers all encodings except MULE_INTERNAL, which is alien to gettext.
|
2009-04-24 10:43:51 +02:00
|
|
|
* ----------
|
|
|
|
*/
|
2014-01-18 22:04:11 +01:00
|
|
|
const pg_enc2gettext pg_enc2gettext_tbl[] =
|
2009-04-24 10:43:51 +02:00
|
|
|
{
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
{PG_SQL_ASCII, "US-ASCII"},
|
2009-04-24 10:43:51 +02:00
|
|
|
{PG_UTF8, "UTF-8"},
|
|
|
|
{PG_LATIN1, "LATIN1"},
|
|
|
|
{PG_LATIN2, "LATIN2"},
|
|
|
|
{PG_LATIN3, "LATIN3"},
|
|
|
|
{PG_LATIN4, "LATIN4"},
|
|
|
|
{PG_ISO_8859_5, "ISO-8859-5"},
|
|
|
|
{PG_ISO_8859_6, "ISO_8859-6"},
|
|
|
|
{PG_ISO_8859_7, "ISO-8859-7"},
|
|
|
|
{PG_ISO_8859_8, "ISO-8859-8"},
|
|
|
|
{PG_LATIN5, "LATIN5"},
|
|
|
|
{PG_LATIN6, "LATIN6"},
|
|
|
|
{PG_LATIN7, "LATIN7"},
|
|
|
|
{PG_LATIN8, "LATIN8"},
|
|
|
|
{PG_LATIN9, "LATIN-9"},
|
|
|
|
{PG_LATIN10, "LATIN10"},
|
|
|
|
{PG_KOI8R, "KOI8-R"},
|
|
|
|
{PG_KOI8U, "KOI8-U"},
|
|
|
|
{PG_WIN1250, "CP1250"},
|
|
|
|
{PG_WIN1251, "CP1251"},
|
|
|
|
{PG_WIN1252, "CP1252"},
|
|
|
|
{PG_WIN1253, "CP1253"},
|
|
|
|
{PG_WIN1254, "CP1254"},
|
|
|
|
{PG_WIN1255, "CP1255"},
|
|
|
|
{PG_WIN1256, "CP1256"},
|
|
|
|
{PG_WIN1257, "CP1257"},
|
|
|
|
{PG_WIN1258, "CP1258"},
|
|
|
|
{PG_WIN866, "CP866"},
|
|
|
|
{PG_WIN874, "CP874"},
|
|
|
|
{PG_EUC_CN, "EUC-CN"},
|
|
|
|
{PG_EUC_JP, "EUC-JP"},
|
|
|
|
{PG_EUC_KR, "EUC-KR"},
|
|
|
|
{PG_EUC_TW, "EUC-TW"},
|
|
|
|
{PG_EUC_JIS_2004, "EUC-JP"},
|
Renovate display of non-ASCII messages on Windows.
GNU gettext selects a default encoding for the messages it emits in a
platform-specific manner; it uses the Windows ANSI code page on Windows
and follows LC_CTYPE on other platforms. This is inconvenient for
PostgreSQL server processes, so realize consistent cross-platform
behavior by calling bind_textdomain_codeset() on Windows each time we
permanently change LC_CTYPE. This primarily affects SQL_ASCII databases
and processes like the postmaster that do not attach to a database,
making their behavior consistent with PostgreSQL on non-Windows
platforms. Messages from SQL_ASCII databases use the encoding implied
by the database LC_CTYPE, and messages from non-database processes use
LC_CTYPE from the postmaster system environment. PlatformEncoding
becomes unused, so remove it.
Make write_console() prefer WriteConsoleW() to write() regardless of the
encodings in use. In this situation, write() will invariably mishandle
non-ASCII characters.
elog.c has assumed that messages conform to the database encoding.
While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL.
Introduce MessageEncoding to track the actual encoding of message text.
The present consumers are Windows-specific code for converting messages
to UTF16 for use in system interfaces. This fixes the appearance in
Windows event logs and consoles of translated messages from SQL_ASCII
processes like the postmaster. Note that SQL_ASCII inherently disclaims
a strong notion of encoding, so non-ASCII byte sequences interpolated
into messages by %s may yet yield a nonsensical message. MULE_INTERNAL
has similar problems at present, albeit for a different reason: its lack
of libiconv support or a conversion to UTF8.
Consequently, one need no longer restart Windows with a different
Windows ANSI code page to broadly test backend logging under a given
language. Changing the user's locale ("Format") is enough. Several
accounts can simultaneously run postmasters under different locales, all
correctly logging localized messages to Windows event logs and consoles.
Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
|
|
|
{PG_SJIS, "SHIFT-JIS"},
|
|
|
|
{PG_BIG5, "BIG5"},
|
|
|
|
{PG_GBK, "GBK"},
|
|
|
|
{PG_UHC, "UHC"},
|
|
|
|
{PG_GB18030, "GB18030"},
|
|
|
|
{PG_JOHAB, "JOHAB"},
|
|
|
|
{PG_SHIFT_JIS_2004, "SHIFT_JISX0213"},
|
2009-04-24 10:43:51 +02:00
|
|
|
{0, NULL}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2017-03-23 20:25:34 +01:00
|
|
|
/*
|
2020-01-17 00:08:21 +01:00
|
|
|
* Table of encoding names for ICU (currently covers backend encodings only)
|
2017-03-23 20:25:34 +01:00
|
|
|
*
|
|
|
|
* Reference: <https://ssl.icu-project.org/icu-bin/convexp>
|
|
|
|
*
|
|
|
|
* NULL entries are not supported by ICU, or their mapping is unclear.
|
|
|
|
*/
|
|
|
|
static const char *const pg_enc2icu_tbl[] =
|
|
|
|
{
|
|
|
|
NULL, /* PG_SQL_ASCII */
|
|
|
|
"EUC-JP", /* PG_EUC_JP */
|
|
|
|
"EUC-CN", /* PG_EUC_CN */
|
|
|
|
"EUC-KR", /* PG_EUC_KR */
|
|
|
|
"EUC-TW", /* PG_EUC_TW */
|
|
|
|
NULL, /* PG_EUC_JIS_2004 */
|
|
|
|
"UTF-8", /* PG_UTF8 */
|
|
|
|
NULL, /* PG_MULE_INTERNAL */
|
|
|
|
"ISO-8859-1", /* PG_LATIN1 */
|
|
|
|
"ISO-8859-2", /* PG_LATIN2 */
|
|
|
|
"ISO-8859-3", /* PG_LATIN3 */
|
|
|
|
"ISO-8859-4", /* PG_LATIN4 */
|
|
|
|
"ISO-8859-9", /* PG_LATIN5 */
|
|
|
|
"ISO-8859-10", /* PG_LATIN6 */
|
|
|
|
"ISO-8859-13", /* PG_LATIN7 */
|
|
|
|
"ISO-8859-14", /* PG_LATIN8 */
|
|
|
|
"ISO-8859-15", /* PG_LATIN9 */
|
|
|
|
NULL, /* PG_LATIN10 */
|
|
|
|
"CP1256", /* PG_WIN1256 */
|
|
|
|
"CP1258", /* PG_WIN1258 */
|
|
|
|
"CP866", /* PG_WIN866 */
|
|
|
|
NULL, /* PG_WIN874 */
|
|
|
|
"KOI8-R", /* PG_KOI8R */
|
|
|
|
"CP1251", /* PG_WIN1251 */
|
|
|
|
"CP1252", /* PG_WIN1252 */
|
|
|
|
"ISO-8859-5", /* PG_ISO_8859_5 */
|
|
|
|
"ISO-8859-6", /* PG_ISO_8859_6 */
|
|
|
|
"ISO-8859-7", /* PG_ISO_8859_7 */
|
|
|
|
"ISO-8859-8", /* PG_ISO_8859_8 */
|
|
|
|
"CP1250", /* PG_WIN1250 */
|
|
|
|
"CP1253", /* PG_WIN1253 */
|
|
|
|
"CP1254", /* PG_WIN1254 */
|
|
|
|
"CP1255", /* PG_WIN1255 */
|
|
|
|
"CP1257", /* PG_WIN1257 */
|
|
|
|
"KOI8-U", /* PG_KOI8U */
|
|
|
|
};
|
|
|
|
|
2020-01-17 00:08:21 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this encoding supported by ICU?
|
|
|
|
*/
|
2017-03-23 20:25:34 +01:00
|
|
|
bool
|
|
|
|
is_encoding_supported_by_icu(int encoding)
|
|
|
|
{
|
2020-01-17 00:08:21 +01:00
|
|
|
if (!PG_VALID_BE_ENCODING(encoding))
|
|
|
|
return false;
|
2017-03-23 20:25:34 +01:00
|
|
|
return (pg_enc2icu_tbl[encoding] != NULL);
|
|
|
|
}
|
|
|
|
|
2020-01-17 00:08:21 +01:00
|
|
|
/*
|
|
|
|
* Returns ICU's name for encoding, or NULL if not supported
|
|
|
|
*/
|
2017-03-23 20:25:34 +01:00
|
|
|
const char *
|
|
|
|
get_encoding_name_for_icu(int encoding)
|
|
|
|
{
|
|
|
|
StaticAssertStmt(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
|
|
|
|
"pg_enc2icu_tbl incomplete");
|
|
|
|
|
2020-01-17 00:08:21 +01:00
|
|
|
if (!PG_VALID_BE_ENCODING(encoding))
|
|
|
|
return NULL;
|
|
|
|
return pg_enc2icu_tbl[encoding];
|
2017-03-23 20:25:34 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2001-09-07 05:32:11 +02:00
|
|
|
/* ----------
|
|
|
|
* Encoding checks, for error returns -1 else encoding id
|
|
|
|
* ----------
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
pg_valid_client_encoding(const char *name)
|
|
|
|
{
|
|
|
|
int enc;
|
|
|
|
|
|
|
|
if ((enc = pg_char_to_encoding(name)) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (!PG_VALID_FE_ENCODING(enc))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return enc;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
pg_valid_server_encoding(const char *name)
|
|
|
|
{
|
|
|
|
int enc;
|
|
|
|
|
|
|
|
if ((enc = pg_char_to_encoding(name)) < 0)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
if (!PG_VALID_BE_ENCODING(enc))
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
return enc;
|
|
|
|
}
|
|
|
|
|
2007-10-13 22:18:42 +02:00
|
|
|
int
|
|
|
|
pg_valid_server_encoding_id(int encoding)
|
|
|
|
{
|
|
|
|
return PG_VALID_BE_ENCODING(encoding);
|
|
|
|
}
|
|
|
|
|
2020-01-17 00:08:21 +01:00
|
|
|
/*
|
|
|
|
* Remove irrelevant chars from encoding name, store at *newkey
|
|
|
|
*
|
|
|
|
* (Caller's responsibility to provide a large enough buffer)
|
2001-09-07 05:32:11 +02:00
|
|
|
*/
|
|
|
|
static char *
|
2007-04-16 20:50:49 +02:00
|
|
|
clean_encoding_name(const char *key, char *newkey)
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
2007-04-16 20:50:49 +02:00
|
|
|
const char *p;
|
|
|
|
char *np;
|
2001-09-07 05:32:11 +02:00
|
|
|
|
|
|
|
for (p = key, np = newkey; *p != '\0'; p++)
|
|
|
|
{
|
|
|
|
if (isalnum((unsigned char) *p))
|
2002-12-06 00:21:07 +01:00
|
|
|
{
|
|
|
|
if (*p >= 'A' && *p <= 'Z')
|
|
|
|
*np++ = *p + 'a' - 'A';
|
|
|
|
else
|
|
|
|
*np++ = *p;
|
|
|
|
}
|
2001-09-07 05:32:11 +02:00
|
|
|
}
|
|
|
|
*np = '\0';
|
|
|
|
return newkey;
|
|
|
|
}
|
|
|
|
|
2020-01-17 00:08:21 +01:00
|
|
|
/*
|
2001-09-07 05:32:11 +02:00
|
|
|
* Search encoding by encoding name
|
2014-01-18 22:04:11 +01:00
|
|
|
*
|
2020-01-17 00:08:21 +01:00
|
|
|
* Returns encoding ID, or -1 if not recognized
|
2001-09-07 05:32:11 +02:00
|
|
|
*/
|
2014-01-18 22:04:11 +01:00
|
|
|
int
|
|
|
|
pg_char_to_encoding(const char *name)
|
2001-09-07 05:32:11 +02:00
|
|
|
{
|
2014-01-18 22:04:11 +01:00
|
|
|
unsigned int nel = lengthof(pg_encname_tbl);
|
|
|
|
const pg_encname *base = pg_encname_tbl,
|
2001-09-07 05:32:11 +02:00
|
|
|
*last = base + nel - 1,
|
|
|
|
*position;
|
|
|
|
int result;
|
|
|
|
char buff[NAMEDATALEN],
|
|
|
|
*key;
|
|
|
|
|
|
|
|
if (name == NULL || *name == '\0')
|
2014-01-18 22:04:11 +01:00
|
|
|
return -1;
|
2001-09-07 05:32:11 +02:00
|
|
|
|
2006-02-12 23:32:43 +01:00
|
|
|
if (strlen(name) >= NAMEDATALEN)
|
2020-01-17 00:08:21 +01:00
|
|
|
return -1; /* it's certainly not in the table */
|
|
|
|
|
2007-04-16 20:50:49 +02:00
|
|
|
key = clean_encoding_name(name, buff);
|
2001-09-07 05:32:11 +02:00
|
|
|
|
|
|
|
while (last >= base)
|
2001-09-07 17:01:45 +02:00
|
|
|
{
|
2001-09-07 05:32:11 +02:00
|
|
|
position = base + ((last - base) >> 1);
|
|
|
|
result = key[0] - position->name[0];
|
2001-09-07 17:01:45 +02:00
|
|
|
|
2001-09-07 05:32:11 +02:00
|
|
|
if (result == 0)
|
|
|
|
{
|
|
|
|
result = strcmp(key, position->name);
|
|
|
|
if (result == 0)
|
2014-01-18 22:04:11 +01:00
|
|
|
return position->encoding;
|
2001-09-07 05:32:11 +02:00
|
|
|
}
|
|
|
|
if (result < 0)
|
|
|
|
last = position - 1;
|
|
|
|
else
|
|
|
|
base = position + 1;
|
|
|
|
}
|
2014-01-18 22:04:11 +01:00
|
|
|
return -1;
|
2001-09-07 05:32:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const char *
|
|
|
|
pg_encoding_to_char(int encoding)
|
|
|
|
{
|
|
|
|
if (PG_VALID_ENCODING(encoding))
|
|
|
|
{
|
2014-01-18 22:04:11 +01:00
|
|
|
const pg_enc2name *p = &pg_enc2name_tbl[encoding];
|
2001-10-25 07:50:21 +02:00
|
|
|
|
2001-09-07 05:32:11 +02:00
|
|
|
Assert(encoding == p->encoding);
|
|
|
|
return p->name;
|
|
|
|
}
|
|
|
|
return "";
|
|
|
|
}
|