postgresql/src/common/encnames.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

598 lines
13 KiB
C
Raw Permalink Normal View History

/*-------------------------------------------------------------------------
2001-09-07 05:32:11 +02:00
*
* encnames.c
* Encoding names and routines for working with them.
*
* Portions Copyright (c) 2001-2024, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/common/encnames.c
*
*-------------------------------------------------------------------------
2001-09-07 05:32:11 +02:00
*/
#include "c.h"
2001-09-07 05:32:11 +02:00
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
#include <ctype.h>
2001-09-07 05:32:11 +02:00
#include <unistd.h>
#include "mb/pg_wchar.h"
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
2001-09-07 05:32:11 +02:00
/* ----------
* All encoding names, sorted: *** A L P H A B E T I C ***
2001-09-07 05:32:11 +02:00
*
* All names must be without irrelevant chars, search routines use
2001-09-07 05:32:11 +02:00
* isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
* are always converted to 'iso88591'. All must be lower case.
*
* The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
2001-09-07 05:32:11 +02:00
*
* Karel Zak, Aug 2001
* ----------
*/
typedef struct pg_encname
{
const char *name;
pg_enc encoding;
} pg_encname;
static const pg_encname pg_encname_tbl[] =
2001-09-07 05:32:11 +02:00
{
{
"abc", PG_WIN1258
}, /* alias for WIN1258 */
2001-09-07 05:32:11 +02:00
{
"alt", PG_WIN866
2001-09-07 05:32:11 +02:00
}, /* IBM866 */
{
"big5", PG_BIG5
}, /* Big5; Chinese for Taiwan multibyte set */
{
"euccn", PG_EUC_CN
}, /* EUC-CN; Extended Unix Code for simplified
* Chinese */
{
"eucjis2004", PG_EUC_JIS_2004
}, /* EUC-JIS-2004; Extended UNIX Code fixed
* Width for Japanese, standard JIS X 0213 */
{
"eucjp", PG_EUC_JP
}, /* EUC-JP; Extended UNIX Code fixed Width for
* Japanese, standard OSF */
2001-09-07 05:32:11 +02:00
{
"euckr", PG_EUC_KR
}, /* EUC-KR; Extended Unix Code for Korean , KS
* X 1001 standard */
{
"euctw", PG_EUC_TW
}, /* EUC-TW; Extended Unix Code for
2002-09-04 22:31:48 +02:00
*
* traditional Chinese */
{
"gb18030", PG_GB18030
}, /* GB18030;GB18030 */
{
"gbk", PG_GBK
}, /* GBK; Chinese Windows CodePage 936
* simplified Chinese */
2001-09-07 05:32:11 +02:00
{
"iso88591", PG_LATIN1
}, /* ISO-8859-1; RFC1345,KXS2 */
{
"iso885910", PG_LATIN6
}, /* ISO-8859-10; RFC1345,KXS2 */
{
"iso885913", PG_LATIN7
}, /* ISO-8859-13; RFC1345,KXS2 */
{
"iso885914", PG_LATIN8
}, /* ISO-8859-14; RFC1345,KXS2 */
{
"iso885915", PG_LATIN9
}, /* ISO-8859-15; RFC1345,KXS2 */
{
"iso885916", PG_LATIN10
}, /* ISO-8859-16; RFC1345,KXS2 */
2001-09-07 05:32:11 +02:00
{
"iso88592", PG_LATIN2
}, /* ISO-8859-2; RFC1345,KXS2 */
{
"iso88593", PG_LATIN3
}, /* ISO-8859-3; RFC1345,KXS2 */
{
"iso88594", PG_LATIN4
}, /* ISO-8859-4; RFC1345,KXS2 */
2001-10-11 16:20:35 +02:00
{
"iso88595", PG_ISO_8859_5
}, /* ISO-8859-5; RFC1345,KXS2 */
{
"iso88596", PG_ISO_8859_6
}, /* ISO-8859-6; RFC1345,KXS2 */
{
"iso88597", PG_ISO_8859_7
}, /* ISO-8859-7; RFC1345,KXS2 */
{
"iso88598", PG_ISO_8859_8
}, /* ISO-8859-8; RFC1345,KXS2 */
{
"iso88599", PG_LATIN5
}, /* ISO-8859-9; RFC1345,KXS2 */
{
"johab", PG_JOHAB
}, /* JOHAB; Extended Unix Code for simplified
* Chinese */
2001-10-11 16:20:35 +02:00
{
"koi8", PG_KOI8R
}, /* _dirty_ alias for KOI8-R (backward
* compatibility) */
2001-09-07 05:32:11 +02:00
{
"koi8r", PG_KOI8R
}, /* KOI8-R; RFC1489 */
2009-02-10 20:29:39 +01:00
{
"koi8u", PG_KOI8U
}, /* KOI8-U; RFC2319 */
2001-09-07 05:32:11 +02:00
{
"latin1", PG_LATIN1
}, /* alias for ISO-8859-1 */
{
"latin10", PG_LATIN10
}, /* alias for ISO-8859-16 */
2001-09-07 05:32:11 +02:00
{
"latin2", PG_LATIN2
}, /* alias for ISO-8859-2 */
{
"latin3", PG_LATIN3
}, /* alias for ISO-8859-3 */
{
"latin4", PG_LATIN4
}, /* alias for ISO-8859-4 */
2001-10-11 16:20:35 +02:00
{
"latin5", PG_LATIN5
}, /* alias for ISO-8859-9 */
{
"latin6", PG_LATIN6
}, /* alias for ISO-8859-10 */
{
"latin7", PG_LATIN7
}, /* alias for ISO-8859-13 */
{
"latin8", PG_LATIN8
}, /* alias for ISO-8859-14 */
{
"latin9", PG_LATIN9
}, /* alias for ISO-8859-15 */
2001-09-07 05:32:11 +02:00
{
"mskanji", PG_SJIS
}, /* alias for Shift_JIS */
{
"muleinternal", PG_MULE_INTERNAL
},
{
"shiftjis", PG_SJIS
}, /* Shift_JIS; JIS X 0202-1991 */
{
"shiftjis2004", PG_SHIFT_JIS_2004
}, /* SHIFT-JIS-2004; Shift JIS for Japanese,
* standard JIS X 0213 */
2001-09-07 05:32:11 +02:00
{
"sjis", PG_SJIS
}, /* alias for Shift_JIS */
{
"sqlascii", PG_SQL_ASCII
},
{
"tcvn", PG_WIN1258
}, /* alias for WIN1258 */
{
"tcvn5712", PG_WIN1258
}, /* alias for WIN1258 */
{
"uhc", PG_UHC
}, /* UHC; Korean Windows CodePage 949 */
2001-09-07 05:32:11 +02:00
{
"unicode", PG_UTF8
}, /* alias for UTF8 */
2001-09-07 05:32:11 +02:00
{
"utf8", PG_UTF8
}, /* alias for UTF8 */
{
"vscii", PG_WIN1258
}, /* alias for WIN1258 */
2001-10-11 16:20:35 +02:00
{
"win", PG_WIN1251
}, /* _dirty_ alias for windows-1251 (backward
* compatibility) */
2001-09-07 05:32:11 +02:00
{
"win1250", PG_WIN1250
}, /* alias for Windows-1250 */
{
"win1251", PG_WIN1251
}, /* alias for Windows-1251 */
{
"win1252", PG_WIN1252
}, /* alias for Windows-1252 */
{
"win1253", PG_WIN1253
}, /* alias for Windows-1253 */
{
"win1254", PG_WIN1254
}, /* alias for Windows-1254 */
{
"win1255", PG_WIN1255
}, /* alias for Windows-1255 */
{
"win1256", PG_WIN1256
}, /* alias for Windows-1256 */
{
"win1257", PG_WIN1257
}, /* alias for Windows-1257 */
{
"win1258", PG_WIN1258
}, /* alias for Windows-1258 */
{
"win866", PG_WIN866
}, /* IBM866 */
{
"win874", PG_WIN874
}, /* alias for Windows-874 */
{
"win932", PG_SJIS
}, /* alias for Shift_JIS */
{
"win936", PG_GBK
}, /* alias for GBK */
{
"win949", PG_UHC
}, /* alias for UHC */
{
"win950", PG_BIG5
}, /* alias for BIG5 */
2001-09-07 05:32:11 +02:00
{
"windows1250", PG_WIN1250
}, /* Windows-1251; Microsoft */
{
"windows1251", PG_WIN1251
}, /* Windows-1251; Microsoft */
{
"windows1252", PG_WIN1252
}, /* Windows-1252; Microsoft */
{
"windows1253", PG_WIN1253
}, /* Windows-1253; Microsoft */
{
"windows1254", PG_WIN1254
}, /* Windows-1254; Microsoft */
{
"windows1255", PG_WIN1255
}, /* Windows-1255; Microsoft */
{
"windows1256", PG_WIN1256
}, /* Windows-1256; Microsoft */
{
"windows1257", PG_WIN1257
}, /* Windows-1257; Microsoft */
{
"windows1258", PG_WIN1258
}, /* Windows-1258; Microsoft */
{
"windows866", PG_WIN866
}, /* IBM866 */
{
"windows874", PG_WIN874
}, /* Windows-874; Microsoft */
{
"windows932", PG_SJIS
}, /* alias for Shift_JIS */
{
"windows936", PG_GBK
}, /* alias for GBK */
{
"windows949", PG_UHC
}, /* alias for UHC */
{
"windows950", PG_BIG5
} /* alias for BIG5 */
2001-09-07 05:32:11 +02:00
};
/* ----------
* These are "official" encoding names.
2001-09-07 05:32:11 +02:00
* ----------
*/
#ifndef WIN32
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name }
#else
#define DEF_ENC2NAME(name, codepage) { #name, PG_##name, codepage }
#endif
const pg_enc2name pg_enc2name_tbl[] =
2001-09-07 05:32:11 +02:00
{
[PG_SQL_ASCII] = DEF_ENC2NAME(SQL_ASCII, 0),
[PG_EUC_JP] = DEF_ENC2NAME(EUC_JP, 20932),
[PG_EUC_CN] = DEF_ENC2NAME(EUC_CN, 20936),
[PG_EUC_KR] = DEF_ENC2NAME(EUC_KR, 51949),
[PG_EUC_TW] = DEF_ENC2NAME(EUC_TW, 0),
[PG_EUC_JIS_2004] = DEF_ENC2NAME(EUC_JIS_2004, 20932),
[PG_UTF8] = DEF_ENC2NAME(UTF8, 65001),
[PG_MULE_INTERNAL] = DEF_ENC2NAME(MULE_INTERNAL, 0),
[PG_LATIN1] = DEF_ENC2NAME(LATIN1, 28591),
[PG_LATIN2] = DEF_ENC2NAME(LATIN2, 28592),
[PG_LATIN3] = DEF_ENC2NAME(LATIN3, 28593),
[PG_LATIN4] = DEF_ENC2NAME(LATIN4, 28594),
[PG_LATIN5] = DEF_ENC2NAME(LATIN5, 28599),
[PG_LATIN6] = DEF_ENC2NAME(LATIN6, 0),
[PG_LATIN7] = DEF_ENC2NAME(LATIN7, 0),
[PG_LATIN8] = DEF_ENC2NAME(LATIN8, 0),
[PG_LATIN9] = DEF_ENC2NAME(LATIN9, 28605),
[PG_LATIN10] = DEF_ENC2NAME(LATIN10, 0),
[PG_WIN1256] = DEF_ENC2NAME(WIN1256, 1256),
[PG_WIN1258] = DEF_ENC2NAME(WIN1258, 1258),
[PG_WIN866] = DEF_ENC2NAME(WIN866, 866),
[PG_WIN874] = DEF_ENC2NAME(WIN874, 874),
[PG_KOI8R] = DEF_ENC2NAME(KOI8R, 20866),
[PG_WIN1251] = DEF_ENC2NAME(WIN1251, 1251),
[PG_WIN1252] = DEF_ENC2NAME(WIN1252, 1252),
[PG_ISO_8859_5] = DEF_ENC2NAME(ISO_8859_5, 28595),
[PG_ISO_8859_6] = DEF_ENC2NAME(ISO_8859_6, 28596),
[PG_ISO_8859_7] = DEF_ENC2NAME(ISO_8859_7, 28597),
[PG_ISO_8859_8] = DEF_ENC2NAME(ISO_8859_8, 28598),
[PG_WIN1250] = DEF_ENC2NAME(WIN1250, 1250),
[PG_WIN1253] = DEF_ENC2NAME(WIN1253, 1253),
[PG_WIN1254] = DEF_ENC2NAME(WIN1254, 1254),
[PG_WIN1255] = DEF_ENC2NAME(WIN1255, 1255),
[PG_WIN1257] = DEF_ENC2NAME(WIN1257, 1257),
[PG_KOI8U] = DEF_ENC2NAME(KOI8U, 21866),
[PG_SJIS] = DEF_ENC2NAME(SJIS, 932),
[PG_BIG5] = DEF_ENC2NAME(BIG5, 950),
[PG_GBK] = DEF_ENC2NAME(GBK, 936),
[PG_UHC] = DEF_ENC2NAME(UHC, 949),
[PG_GB18030] = DEF_ENC2NAME(GB18030, 54936),
[PG_JOHAB] = DEF_ENC2NAME(JOHAB, 0),
[PG_SHIFT_JIS_2004] = DEF_ENC2NAME(SHIFT_JIS_2004, 932),
2001-09-07 05:32:11 +02:00
};
/* ----------
* These are encoding names for gettext.
Renovate display of non-ASCII messages on Windows. GNU gettext selects a default encoding for the messages it emits in a platform-specific manner; it uses the Windows ANSI code page on Windows and follows LC_CTYPE on other platforms. This is inconvenient for PostgreSQL server processes, so realize consistent cross-platform behavior by calling bind_textdomain_codeset() on Windows each time we permanently change LC_CTYPE. This primarily affects SQL_ASCII databases and processes like the postmaster that do not attach to a database, making their behavior consistent with PostgreSQL on non-Windows platforms. Messages from SQL_ASCII databases use the encoding implied by the database LC_CTYPE, and messages from non-database processes use LC_CTYPE from the postmaster system environment. PlatformEncoding becomes unused, so remove it. Make write_console() prefer WriteConsoleW() to write() regardless of the encodings in use. In this situation, write() will invariably mishandle non-ASCII characters. elog.c has assumed that messages conform to the database encoding. While usually true, this does not hold for SQL_ASCII and MULE_INTERNAL. Introduce MessageEncoding to track the actual encoding of message text. The present consumers are Windows-specific code for converting messages to UTF16 for use in system interfaces. This fixes the appearance in Windows event logs and consoles of translated messages from SQL_ASCII processes like the postmaster. Note that SQL_ASCII inherently disclaims a strong notion of encoding, so non-ASCII byte sequences interpolated into messages by %s may yet yield a nonsensical message. MULE_INTERNAL has similar problems at present, albeit for a different reason: its lack of libiconv support or a conversion to UTF8. Consequently, one need no longer restart Windows with a different Windows ANSI code page to broadly test backend logging under a given language. Changing the user's locale ("Format") is enough. Several accounts can simultaneously run postmasters under different locales, all correctly logging localized messages to Windows event logs and consoles. Alexander Law and Noah Misch
2013-06-26 17:17:33 +02:00
*
* This covers all encodings except MULE_INTERNAL, which is alien to gettext.
* ----------
*/
const char *pg_enc2gettext_tbl[] =
{
[PG_SQL_ASCII] = "US-ASCII",
[PG_UTF8] = "UTF-8",
[PG_MULE_INTERNAL] = NULL,
[PG_LATIN1] = "LATIN1",
[PG_LATIN2] = "LATIN2",
[PG_LATIN3] = "LATIN3",
[PG_LATIN4] = "LATIN4",
[PG_ISO_8859_5] = "ISO-8859-5",
[PG_ISO_8859_6] = "ISO_8859-6",
[PG_ISO_8859_7] = "ISO-8859-7",
[PG_ISO_8859_8] = "ISO-8859-8",
[PG_LATIN5] = "LATIN5",
[PG_LATIN6] = "LATIN6",
[PG_LATIN7] = "LATIN7",
[PG_LATIN8] = "LATIN8",
[PG_LATIN9] = "LATIN-9",
[PG_LATIN10] = "LATIN10",
[PG_KOI8R] = "KOI8-R",
[PG_KOI8U] = "KOI8-U",
[PG_WIN1250] = "CP1250",
[PG_WIN1251] = "CP1251",
[PG_WIN1252] = "CP1252",
[PG_WIN1253] = "CP1253",
[PG_WIN1254] = "CP1254",
[PG_WIN1255] = "CP1255",
[PG_WIN1256] = "CP1256",
[PG_WIN1257] = "CP1257",
[PG_WIN1258] = "CP1258",
[PG_WIN866] = "CP866",
[PG_WIN874] = "CP874",
[PG_EUC_CN] = "EUC-CN",
[PG_EUC_JP] = "EUC-JP",
[PG_EUC_KR] = "EUC-KR",
[PG_EUC_TW] = "EUC-TW",
[PG_EUC_JIS_2004] = "EUC-JP",
[PG_SJIS] = "SHIFT-JIS",
[PG_BIG5] = "BIG5",
[PG_GBK] = "GBK",
[PG_UHC] = "UHC",
[PG_GB18030] = "GB18030",
[PG_JOHAB] = "JOHAB",
[PG_SHIFT_JIS_2004] = "SHIFT_JISX0213",
};
/*
* Table of encoding names for ICU (currently covers backend encodings only)
*
* Reference: <https://ssl.icu-project.org/icu-bin/convexp>
*
* NULL entries are not supported by ICU, or their mapping is unclear.
*/
static const char *const pg_enc2icu_tbl[] =
{
[PG_SQL_ASCII] = NULL,
[PG_EUC_JP] = "EUC-JP",
[PG_EUC_CN] = "EUC-CN",
[PG_EUC_KR] = "EUC-KR",
[PG_EUC_TW] = "EUC-TW",
[PG_EUC_JIS_2004] = NULL,
[PG_UTF8] = "UTF-8",
[PG_MULE_INTERNAL] = NULL,
[PG_LATIN1] = "ISO-8859-1",
[PG_LATIN2] = "ISO-8859-2",
[PG_LATIN3] = "ISO-8859-3",
[PG_LATIN4] = "ISO-8859-4",
[PG_LATIN5] = "ISO-8859-9",
[PG_LATIN6] = "ISO-8859-10",
[PG_LATIN7] = "ISO-8859-13",
[PG_LATIN8] = "ISO-8859-14",
[PG_LATIN9] = "ISO-8859-15",
[PG_LATIN10] = NULL,
[PG_WIN1256] = "CP1256",
[PG_WIN1258] = "CP1258",
[PG_WIN866] = "CP866",
[PG_WIN874] = NULL,
[PG_KOI8R] = "KOI8-R",
[PG_WIN1251] = "CP1251",
[PG_WIN1252] = "CP1252",
[PG_ISO_8859_5] = "ISO-8859-5",
[PG_ISO_8859_6] = "ISO-8859-6",
[PG_ISO_8859_7] = "ISO-8859-7",
[PG_ISO_8859_8] = "ISO-8859-8",
[PG_WIN1250] = "CP1250",
[PG_WIN1253] = "CP1253",
[PG_WIN1254] = "CP1254",
[PG_WIN1255] = "CP1255",
[PG_WIN1257] = "CP1257",
[PG_KOI8U] = "KOI8-U",
};
StaticAssertDecl(lengthof(pg_enc2icu_tbl) == PG_ENCODING_BE_LAST + 1,
"pg_enc2icu_tbl incomplete");
/*
* Is this encoding supported by ICU?
*/
bool
is_encoding_supported_by_icu(int encoding)
{
if (!PG_VALID_BE_ENCODING(encoding))
return false;
return (pg_enc2icu_tbl[encoding] != NULL);
}
/*
* Returns ICU's name for encoding, or NULL if not supported
*/
const char *
get_encoding_name_for_icu(int encoding)
{
if (!PG_VALID_BE_ENCODING(encoding))
return NULL;
return pg_enc2icu_tbl[encoding];
}
2001-09-07 05:32:11 +02:00
/* ----------
* Encoding checks, for error returns -1 else encoding id
* ----------
*/
int
pg_valid_client_encoding(const char *name)
{
int enc;
if ((enc = pg_char_to_encoding(name)) < 0)
return -1;
if (!PG_VALID_FE_ENCODING(enc))
return -1;
return enc;
}
int
pg_valid_server_encoding(const char *name)
{
int enc;
if ((enc = pg_char_to_encoding(name)) < 0)
return -1;
if (!PG_VALID_BE_ENCODING(enc))
return -1;
return enc;
}
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
int
pg_valid_server_encoding_id(int encoding)
{
return PG_VALID_BE_ENCODING(encoding);
}
/*
* Remove irrelevant chars from encoding name, store at *newkey
*
* (Caller's responsibility to provide a large enough buffer)
2001-09-07 05:32:11 +02:00
*/
static char *
clean_encoding_name(const char *key, char *newkey)
2001-09-07 05:32:11 +02:00
{
const char *p;
char *np;
2001-09-07 05:32:11 +02:00
for (p = key, np = newkey; *p != '\0'; p++)
{
if (isalnum((unsigned char) *p))
{
if (*p >= 'A' && *p <= 'Z')
*np++ = *p + 'a' - 'A';
else
*np++ = *p;
}
2001-09-07 05:32:11 +02:00
}
*np = '\0';
return newkey;
}
/*
2001-09-07 05:32:11 +02:00
* Search encoding by encoding name
*
* Returns encoding ID, or -1 if not recognized
2001-09-07 05:32:11 +02:00
*/
int
pg_char_to_encoding(const char *name)
2001-09-07 05:32:11 +02:00
{
unsigned int nel = lengthof(pg_encname_tbl);
const pg_encname *base = pg_encname_tbl,
2001-09-07 05:32:11 +02:00
*last = base + nel - 1,
*position;
int result;
char buff[NAMEDATALEN],
*key;
if (name == NULL || *name == '\0')
return -1;
2001-09-07 05:32:11 +02:00
if (strlen(name) >= NAMEDATALEN)
return -1; /* it's certainly not in the table */
key = clean_encoding_name(name, buff);
2001-09-07 05:32:11 +02:00
while (last >= base)
{
2001-09-07 05:32:11 +02:00
position = base + ((last - base) >> 1);
result = key[0] - position->name[0];
2001-09-07 05:32:11 +02:00
if (result == 0)
{
result = strcmp(key, position->name);
if (result == 0)
return position->encoding;
2001-09-07 05:32:11 +02:00
}
if (result < 0)
last = position - 1;
else
base = position + 1;
}
return -1;
2001-09-07 05:32:11 +02:00
}
const char *
pg_encoding_to_char(int encoding)
{
if (PG_VALID_ENCODING(encoding))
{
const pg_enc2name *p = &pg_enc2name_tbl[encoding];
2001-09-07 05:32:11 +02:00
Assert(encoding == p->encoding);
return p->name;
}
return "";
}