postgresql/src/backend/utils/mb/encnames.c

588 lines
10 KiB
C
Raw Normal View History

2001-09-07 05:32:11 +02:00
/*
* Encoding names and routines for work with it. All
* in this file is shared bedween FE and BE.
*
2007-11-15 22:14:46 +01:00
* $PostgreSQL: pgsql/src/backend/utils/mb/encnames.c,v 1.37 2007/11/15 21:14:40 momjian Exp $
2001-09-07 05:32:11 +02:00
*/
#ifdef FRONTEND
#include "postgres_fe.h"
#define Assert(condition)
#else
#include "postgres.h"
#include "utils/builtins.h"
#endif
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
#include <ctype.h>
2001-09-07 05:32:11 +02:00
#include <unistd.h>
#include "mb/pg_wchar.h"
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
2001-09-07 05:32:11 +02:00
/* ----------
* All encoding names, sorted: *** A L P H A B E T I C ***
2001-09-07 05:32:11 +02:00
*
* All names must be without irrelevant chars, search routines use
2001-09-07 05:32:11 +02:00
* isalnum() chars only. It means ISO-8859-1, iso_8859-1 and Iso8859_1
* are always converted to 'iso88591'. All must be lower case.
*
* The table doesn't contain 'cs' aliases (like csISOLatin1). It's needed?
2001-09-07 05:32:11 +02:00
*
* Karel Zak, Aug 2001
* ----------
*/
pg_encname pg_encname_tbl[] =
2001-09-07 05:32:11 +02:00
{
{
"abc", PG_WIN1258
}, /* alias for WIN1258 */
{
"alt", PG_WIN866
}, /* IBM866 */
{
"big5", PG_BIG5
}, /* Big5; Chinese for Taiwan multibyte set */
{
"euccn", PG_EUC_CN
2005-10-15 04:49:52 +02:00
}, /* EUC-CN; Extended Unix Code for simplified
* Chinese */
{
"eucjis2004", PG_EUC_JIS_2004
2007-11-15 22:14:46 +01:00
}, /* EUC-JIS-2004; Extended UNIX Code fixed
* Width for Japanese, standard JIS X 0213 */
{
"eucjp", PG_EUC_JP
2005-10-15 04:49:52 +02:00
}, /* EUC-JP; Extended UNIX Code fixed Width for
* Japanese, standard OSF */
{
"euckr", PG_EUC_KR
2005-10-15 04:49:52 +02:00
}, /* EUC-KR; Extended Unix Code for Korean , KS
* X 1001 standard */
{
"euctw", PG_EUC_TW
}, /* EUC-TW; Extended Unix Code for
2002-09-04 22:31:48 +02:00
*
* traditional Chinese */
{
"gb18030", PG_GB18030
}, /* GB18030;GB18030 */
{
"gbk", PG_GBK
}, /* GBK; Chinese Windows CodePage 936
* simplified Chinese */
{
"iso88591", PG_LATIN1
}, /* ISO-8859-1; RFC1345,KXS2 */
{
"iso885910", PG_LATIN6
}, /* ISO-8859-10; RFC1345,KXS2 */
{
"iso885913", PG_LATIN7
}, /* ISO-8859-13; RFC1345,KXS2 */
{
"iso885914", PG_LATIN8
}, /* ISO-8859-14; RFC1345,KXS2 */
{
"iso885915", PG_LATIN9
}, /* ISO-8859-15; RFC1345,KXS2 */
{
"iso885916", PG_LATIN10
}, /* ISO-8859-16; RFC1345,KXS2 */
{
"iso88592", PG_LATIN2
}, /* ISO-8859-2; RFC1345,KXS2 */
{
"iso88593", PG_LATIN3
}, /* ISO-8859-3; RFC1345,KXS2 */
{
"iso88594", PG_LATIN4
}, /* ISO-8859-4; RFC1345,KXS2 */
{
"iso88595", PG_ISO_8859_5
}, /* ISO-8859-5; RFC1345,KXS2 */
{
"iso88596", PG_ISO_8859_6
}, /* ISO-8859-6; RFC1345,KXS2 */
{
"iso88597", PG_ISO_8859_7
}, /* ISO-8859-7; RFC1345,KXS2 */
{
"iso88598", PG_ISO_8859_8
}, /* ISO-8859-8; RFC1345,KXS2 */
{
"iso88599", PG_LATIN5
}, /* ISO-8859-9; RFC1345,KXS2 */
{
"johab", PG_JOHAB
2005-10-15 04:49:52 +02:00
}, /* JOHAB; Extended Unix Code for simplified
* Chinese */
{
"koi8", PG_KOI8R
}, /* _dirty_ alias for KOI8-R (backward
* compatibility) */
{
"koi8r", PG_KOI8R
}, /* KOI8-R; RFC1489 */
{
"latin1", PG_LATIN1
}, /* alias for ISO-8859-1 */
{
"latin10", PG_LATIN10
}, /* alias for ISO-8859-16 */
{
"latin2", PG_LATIN2
}, /* alias for ISO-8859-2 */
{
"latin3", PG_LATIN3
}, /* alias for ISO-8859-3 */
{
"latin4", PG_LATIN4
}, /* alias for ISO-8859-4 */
{
"latin5", PG_LATIN5
}, /* alias for ISO-8859-9 */
{
"latin6", PG_LATIN6
}, /* alias for ISO-8859-10 */
{
"latin7", PG_LATIN7
}, /* alias for ISO-8859-13 */
{
"latin8", PG_LATIN8
}, /* alias for ISO-8859-14 */
{
"latin9", PG_LATIN9
}, /* alias for ISO-8859-15 */
{
"mskanji", PG_SJIS
}, /* alias for Shift_JIS */
{
"muleinternal", PG_MULE_INTERNAL
},
{
"shiftjis", PG_SJIS
}, /* Shift_JIS; JIS X 0202-1991 */
{
"shiftjis2004", PG_SHIFT_JIS_2004
2007-11-15 22:14:46 +01:00
}, /* SHIFT-JIS-2004; Shift JIS for Japanese,
* standard JIS X 0213 */
{
"sjis", PG_SJIS
}, /* alias for Shift_JIS */
{
"sqlascii", PG_SQL_ASCII
},
{
"tcvn", PG_WIN1258
}, /* alias for WIN1258 */
{
"tcvn5712", PG_WIN1258
}, /* alias for WIN1258 */
{
"uhc", PG_UHC
}, /* UHC; Korean Windows CodePage 949 */
{
"unicode", PG_UTF8
}, /* alias for UTF8 */
{
"utf8", PG_UTF8
}, /* alias for UTF8 */
{
"vscii", PG_WIN1258
}, /* alias for WIN1258 */
{
"win", PG_WIN1251
2005-10-15 04:49:52 +02:00
}, /* _dirty_ alias for windows-1251 (backward
* compatibility) */
{
"win1250", PG_WIN1250
}, /* alias for Windows-1250 */
{
"win1251", PG_WIN1251
}, /* alias for Windows-1251 */
{
"win1252", PG_WIN1252
}, /* alias for Windows-1252 */
{
"win1253", PG_WIN1253
}, /* alias for Windows-1253 */
{
"win1254", PG_WIN1254
}, /* alias for Windows-1254 */
{
"win1255", PG_WIN1255
}, /* alias for Windows-1255 */
{
"win1256", PG_WIN1256
}, /* alias for Windows-1256 */
{
"win1257", PG_WIN1257
}, /* alias for Windows-1257 */
{
"win1258", PG_WIN1258
}, /* alias for Windows-1258 */
{
"win866", PG_WIN866
}, /* IBM866 */
{
"win874", PG_WIN874
}, /* alias for Windows-874 */
{
"win932", PG_SJIS
}, /* alias for Shift_JIS */
{
"win936", PG_GBK
}, /* alias for GBK */
{
"win949", PG_UHC
}, /* alias for UHC */
{
"win950", PG_BIG5
}, /* alias for BIG5 */
{
"windows1250", PG_WIN1250
}, /* Windows-1251; Microsoft */
{
"windows1251", PG_WIN1251
}, /* Windows-1251; Microsoft */
{
"windows1252", PG_WIN1252
}, /* Windows-1252; Microsoft */
{
"windows1253", PG_WIN1253
}, /* Windows-1253; Microsoft */
{
"windows1254", PG_WIN1254
}, /* Windows-1254; Microsoft */
{
"windows1255", PG_WIN1255
}, /* Windows-1255; Microsoft */
{
"windows1256", PG_WIN1256
}, /* Windows-1256; Microsoft */
{
"windows1257", PG_WIN1257
}, /* Windows-1257; Microsoft */
{
"windows1258", PG_WIN1258
}, /* Windows-1258; Microsoft */
{
"windows866", PG_WIN866
}, /* IBM866 */
{
"windows874", PG_WIN874
}, /* Windows-874; Microsoft */
{
"windows932", PG_SJIS
}, /* alias for Shift_JIS */
{
"windows936", PG_GBK
}, /* alias for GBK */
{
"windows949", PG_UHC
}, /* alias for UHC */
{
"windows950", PG_BIG5
}, /* alias for BIG5 */
{
NULL, 0
} /* last */
2001-09-07 05:32:11 +02:00
};
unsigned int pg_encname_tbl_sz = \
sizeof(pg_encname_tbl) / sizeof(pg_encname_tbl[0]) - 1;
2001-09-07 05:32:11 +02:00
/* ----------
* These are "official" encoding names.
* XXX must be sorted by the same order as enum pg_enc (in mb/pg_wchar.h)
2001-09-07 05:32:11 +02:00
* ----------
*/
pg_enc2name pg_enc2name_tbl[] =
{
{
"SQL_ASCII", PG_SQL_ASCII
},
{
"EUC_JP", PG_EUC_JP
},
{
"EUC_CN", PG_EUC_CN
},
{
"EUC_KR", PG_EUC_KR
},
{
"EUC_TW", PG_EUC_TW
},
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
{
"EUC_JIS_2004", PG_EUC_JIS_2004
},
{
"UTF8", PG_UTF8
},
{
"MULE_INTERNAL", PG_MULE_INTERNAL
},
{
"LATIN1", PG_LATIN1
},
{
"LATIN2", PG_LATIN2
},
{
"LATIN3", PG_LATIN3
},
{
"LATIN4", PG_LATIN4
},
{
"LATIN5", PG_LATIN5
},
{
"LATIN6", PG_LATIN6
},
{
"LATIN7", PG_LATIN7
},
{
"LATIN8", PG_LATIN8
},
{
"LATIN9", PG_LATIN9
},
{
"LATIN10", PG_LATIN10
},
{
"WIN1256", PG_WIN1256
},
{
"WIN1258", PG_WIN1258
},
{
"WIN866", PG_WIN866
},
{
"WIN874", PG_WIN874
},
{
"KOI8", PG_KOI8R
},
{
"WIN1251", PG_WIN1251
},
{
"WIN1252", PG_WIN1252
},
{
"ISO_8859_5", PG_ISO_8859_5
},
{
"ISO_8859_6", PG_ISO_8859_6
},
{
"ISO_8859_7", PG_ISO_8859_7
},
{
"ISO_8859_8", PG_ISO_8859_8
},
2004-09-17 23:59:57 +02:00
{
"WIN1250", PG_WIN1250
},
{
"WIN1253", PG_WIN1253
},
{
"WIN1254", PG_WIN1254
},
{
"WIN1255", PG_WIN1255
},
{
"WIN1257", PG_WIN1257
},
{
"SJIS", PG_SJIS
},
{
"BIG5", PG_BIG5
},
{
"GBK", PG_GBK
},
{
"UHC", PG_UHC
},
{
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
"GB18030", PG_GB18030
},
{
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
"JOHAB", PG_JOHAB
},
{
"SHIFT_JIS_2004", PG_SHIFT_JIS_2004
}
2001-09-07 05:32:11 +02:00
};
/* ----------
* Encoding checks, for error returns -1 else encoding id
* ----------
*/
int
pg_valid_client_encoding(const char *name)
{
int enc;
2001-09-07 05:32:11 +02:00
if ((enc = pg_char_to_encoding(name)) < 0)
return -1;
if (!PG_VALID_FE_ENCODING(enc))
2001-09-07 05:32:11 +02:00
return -1;
return enc;
}
int
pg_valid_server_encoding(const char *name)
{
int enc;
2001-09-07 05:32:11 +02:00
if ((enc = pg_char_to_encoding(name)) < 0)
return -1;
if (!PG_VALID_BE_ENCODING(enc))
2001-09-07 05:32:11 +02:00
return -1;
return enc;
}
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
int
pg_valid_server_encoding_id(int encoding)
{
return PG_VALID_BE_ENCODING(encoding);
}
2001-09-07 05:32:11 +02:00
/* ----------
* Remove irrelevant chars from encoding name
* ----------
*/
static char *
clean_encoding_name(const char *key, char *newkey)
2001-09-07 05:32:11 +02:00
{
const char *p;
2007-11-15 22:14:46 +01:00
char *np;
2001-09-07 05:32:11 +02:00
for (p = key, np = newkey; *p != '\0'; p++)
2001-09-07 05:32:11 +02:00
{
if (isalnum((unsigned char) *p))
{
if (*p >= 'A' && *p <= 'Z')
*np++ = *p + 'a' - 'A';
else
*np++ = *p;
}
2001-09-07 05:32:11 +02:00
}
*np = '\0';
return newkey;
}
/* ----------
* Search encoding by encoding name
* ----------
*/
pg_encname *
pg_char_to_encname_struct(const char *name)
{
unsigned int nel = pg_encname_tbl_sz;
pg_encname *base = pg_encname_tbl,
*last = base + nel - 1,
*position;
int result;
2001-09-07 05:32:11 +02:00
char buff[NAMEDATALEN],
*key;
2001-09-07 05:32:11 +02:00
if (name == NULL || *name == '\0')
return NULL;
2001-09-07 05:32:11 +02:00
if (strlen(name) >= NAMEDATALEN)
2001-09-07 05:32:11 +02:00
{
#ifdef FRONTEND
fprintf(stderr, "encoding name too long\n");
2001-09-07 05:32:11 +02:00
return NULL;
#else
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("encoding name too long")));
2001-09-07 05:32:11 +02:00
#endif
}
key = clean_encoding_name(name, buff);
2001-09-07 05:32:11 +02:00
while (last >= base)
{
2001-09-07 05:32:11 +02:00
position = base + ((last - base) >> 1);
result = key[0] - position->name[0];
2001-09-07 05:32:11 +02:00
if (result == 0)
{
result = strcmp(key, position->name);
if (result == 0)
return position;
}
if (result < 0)
last = position - 1;
else
base = position + 1;
}
return NULL;
}
/*
* Returns encoding or -1 for error
*/
int
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
pg_char_to_encoding(const char *name)
2001-09-07 05:32:11 +02:00
{
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
pg_encname *p;
2001-09-07 05:32:11 +02:00
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
if (!name)
return -1;
2001-09-07 05:32:11 +02:00
Fix the inadvertent libpq ABI breakage discovered by Martin Pitt: the renumbering of encoding IDs done between 8.2 and 8.3 turns out to break 8.2 initdb and psql if they are run with an 8.3beta1 libpq.so. For the moment we can rearrange the order of enum pg_enc to keep the same number for everything except PG_JOHAB, which isn't a problem since there are no direct references to it in the 8.2 programs anyway. (This does force initdb unfortunately.) Going forward, we want to fix things so that encoding IDs can be changed without an ABI break, and this commit includes the changes needed to allow libpq's encoding IDs to be treated as fully independent of the backend's. The main issue is that libpq clients should not include pg_wchar.h or otherwise assume they know the specific values of libpq's encoding IDs, since they might encounter version skew between pg_wchar.h and the libpq.so they are using. To fix, have libpq officially export functions needed for encoding name<=>ID conversion and validity checking; it was doing this anyway unofficially. It's still the case that we can't renumber backend encoding IDs until the next bump in libpq's major version number, since doing so will break the 8.2-era client programs. However the code is now prepared to avoid this type of problem in future. Note that initdb is no longer a libpq client: we just pull in the two source files we need directly. The patch also fixes a few places that were being sloppy about checking for an unrecognized encoding name.
2007-10-13 22:18:42 +02:00
p = pg_char_to_encname_struct(name);
2001-09-07 05:32:11 +02:00
return p ? p->encoding : -1;
}
#ifndef FRONTEND
Datum
PG_char_to_encoding(PG_FUNCTION_ARGS)
{
Name s = PG_GETARG_NAME(0);
2001-09-07 05:32:11 +02:00
PG_RETURN_INT32(pg_char_to_encoding(NameStr(*s)));
}
#endif
const char *
pg_encoding_to_char(int encoding)
{
if (PG_VALID_ENCODING(encoding))
{
pg_enc2name *p = &pg_enc2name_tbl[encoding];
Assert(encoding == p->encoding);
2001-09-07 05:32:11 +02:00
return p->name;
}
return "";
}
#ifndef FRONTEND
Datum
PG_encoding_to_char(PG_FUNCTION_ARGS)
{
int32 encoding = PG_GETARG_INT32(0);
const char *encoding_name = pg_encoding_to_char(encoding);
2001-09-07 05:32:11 +02:00
return DirectFunctionCall1(namein, CStringGetDatum(encoding_name));
}
2001-09-07 05:32:11 +02:00
#endif