diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index 9aac4e96a9..e2b6ff0c9d 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -2,7 +2,7 @@ * conversion between client encoding and server internal encoding * (currently mule internal code (mic) is used) * Tatsuo Ishii - * $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ + * $Id: conv.c,v 1.6 1999/03/24 07:02:16 ishii Exp $ */ #include #include @@ -588,6 +588,262 @@ mic2ascii(unsigned char *mic, unsigned char *p, int len) *p = '\0'; } +/* + * Cyrillic support + * currently supported Cyrillic encodings: + * + * KOI8-R (this is the charset for the mule internal code + * for Cyrillic) + * ISO-8859-5 + * Microsoft's CP1251(windows-1251) + * Alternativny Variant (MS-DOS CP866) + */ + +/* koi2mic: KOI8-R to Mule internal code */ +static void +koi2mic(unsigned char *l, unsigned char *p, int len) +{ + latin2mic(l, p, len, LC_KOI8_R); +} + +/* mic2koi: Mule internal code to KOI8-R */ +static void +mic2koi(unsigned char *mic, unsigned char *p, int len) +{ + mic2latin(mic, p, len, LC_KOI8_R); +} + +/* + * latin2mic_with_table: a generic single byte charset encoding + * conversion from a local charset to the mule internal code. + * with a encoding conversion table. + * the table is ordered according to the local charset, + * starting from 128 (0x80). each entry in the table + * holds the corresponding code point for the mule internal code. + */ +static void +latin2mic_with_table( + unsigned char *l, /* local charset string (source) */ + unsigned char *p, /* pointer to store mule internal code + (destination) */ + int len, /* length of l */ + int lc, /* leading character of p */ + unsigned char *tab /* code conversion table */ + ) +{ + unsigned char c1,c2; + + while (len-- > 0 && (c1 = *l++)) { + if (c1 < 128) { + *p++ = c1; + } else { + c2 = tab[c1 - 128]; + if (c2) { + *p++ = lc; + *p++ = c2; + } else { + *p++ = ' '; /* cannot convert */ + } + } + } + *p = '\0'; +} + +/* + * mic2latin_with_table: a generic single byte charset encoding + * conversion from the mule internal code to a local charset + * with a encoding conversion table. + * the table is ordered according to the second byte of the mule + * internal code starting from 128 (0x80). + * each entry in the table + * holds the corresponding code point for the local code. + */ +static void +mic2latin_with_table( + unsigned char *mic, /* mule internal code (source) */ + unsigned char *p, /* local code (destination) */ + int len, /* length of p */ + int lc, /* leading character */ + unsigned char *tab /* code conversion table */ + ) +{ + + unsigned char c1,c2; + + while (len-- > 0 && (c1 = *mic++)) { + if (c1 < 128) { + *p++ = c1; + } else if (c1 == lc) { + c1 = *mic++; + len--; + c2 = tab[c1 - 128]; + if (c2) { + *p++ = c2; + } else { + *p++ = ' '; /* cannot convert */ + } + } else { + *p++ = ' '; /* bogus character */ + } + } + *p = '\0'; +} + +/* iso2mic: ISO-8859-5 to Mule internal code */ +static void +iso2mic(unsigned char *l, unsigned char *p, int len) +{ + static char iso2koi[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, + 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, + 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, + 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, + 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, + 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + latin2mic_with_table(l, p, len, LC_KOI8_R, iso2koi); +} + +/* mic2iso: Mule internal code to ISO8859-5 */ +static void +mic2iso(unsigned char *mic, unsigned char *p, int len) +{ + static char koi2iso[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3, + 0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, + 0xdf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xd6, 0xd2, + 0xec, 0xeb, 0xd7, 0xe8, 0xed, 0xe9, 0xe7, 0xea, + 0xce, 0xb0, 0xb1, 0xc6, 0xb4, 0xb5, 0xc4, 0xb3, + 0xc5, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, + 0xbf, 0xcf, 0xc0, 0xc1, 0xc2, 0xc3, 0xb6, 0xb2, + 0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca + }; + + mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2iso); +} + +/* win2mic: CP1251 to Mule internal code */ +static void +win2mic(unsigned char *l, unsigned char *p, int len) +{ + static char win2koi[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00, + 0xb3, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, 0xb7, + 0x00, 0x00, 0xb6, 0xa6, 0xad, 0x00, 0x00, 0x00, + 0xa3, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x00, 0xa7, + 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, + 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, + 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, + 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, + 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, + 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1 + }; + latin2mic_with_table(l, p, len, LC_KOI8_R, win2koi); +} + +/* mic2win: Mule internal code to CP1251 */ +static void +mic2win(unsigned char *mic, unsigned char *p, int len) +{ + static char koi2win[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xb8, 0xba, 0x00, 0xb3, 0xbf, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xa8, 0xaa, 0x00, 0xb2, 0xaf, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xa5, 0x00, 0x00, + 0xfe, 0xe0, 0xe1, 0xf6, 0xe4, 0xe5, 0xf4, 0xe3, + 0xf5, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, + 0xef, 0xff, 0xf0, 0xf1, 0xf2, 0xf3, 0xe6, 0xe2, + 0xfc, 0xfb, 0xe7, 0xf8, 0xfd, 0xf9, 0xf7, 0xfa, + 0xde, 0xc0, 0xc1, 0xd6, 0xc4, 0xc5, 0xd4, 0xc3, + 0xd5, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, + 0xcf, 0xdf, 0xd0, 0xd1, 0xd2, 0xd3, 0xc6, 0xc2, + 0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda + }; + mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2win); +} + +/* alt2mic: CP866 to Mule internal code */ +static void +alt2mic(unsigned char *l, unsigned char *p, int len) +{ + static char alt2koi[] = { + 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa, + 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, + 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe, + 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1, + 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda, + 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde, + 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1, + 0xb3, 0xa3, 0xb4, 0xa4, 0xb7, 0xa7, 0x00, 0x00, + 0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + latin2mic_with_table(l, p, len, LC_KOI8_R, alt2koi); +} + +/* mic2alt: Mule internal code to CP866 */ +static void +mic2alt(unsigned char *mic, unsigned char *p, int len) +{ + static char koi2alt[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xf1, 0xf3, 0x00, 0xf9, 0xf5, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xf0, 0xf2, 0x00, 0xf8, 0xf4, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00, + 0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3, + 0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, + 0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2, + 0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea, + 0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83, + 0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, + 0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82, + 0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a + }; + mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2alt); +} + +/* + * end of Cyrillic support + */ + pg_encoding_conv_tbl pg_conv_tbl[] = { {SQL_ASCII, "SQL_ASCII", 0, ascii2mic, mic2ascii}, /* SQL/ACII */ {EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp}, /* EUC_JP */ @@ -600,7 +856,10 @@ pg_encoding_conv_tbl pg_conv_tbl[] = { {LATIN2, "LATIN2", 0, latin22mic, mic2latin2}, /* ISO 8859 Latin 2 */ {LATIN3, "LATIN3", 0, latin32mic, mic2latin3}, /* ISO 8859 Latin 3 */ {LATIN4, "LATIN4", 0, latin42mic, mic2latin4}, /* ISO 8859 Latin 4 */ - {LATIN5, "LATIN5", 0, latin52mic, mic2latin5}, /* ISO 8859 Latin 5 */ + {LATIN5, "LATIN5", 0, iso2mic, mic2iso}, /* ISO 8859 Latin 5 */ + {KOI8, "KOI8", 0, koi2mic, mic2koi}, /* KOI8-R */ + {WIN, "WIN", 0, win2mic, mic2win}, /* CP1251 */ + {ALT, "ALT", 0, alt2mic, mic2alt}, /* CP866 */ {SJIS, "SJIS", 1, sjis2mic, mic2sjis}, /* SJIS */ {BIG5, "BIG5", 1, big52mic, mic2big5}, /* Big5 */ {-1, "", 0, 0, 0} /* end mark */ diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 2a1141fbad..78f22c15eb 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multi-byte streams. * Tatsuo Ishii - * $Id: wchar.c,v 1.5 1999/02/02 18:51:23 momjian Exp $ + * $Id: wchar.c,v 1.6 1999/03/24 07:02:17 ishii Exp $ */ #include "mb/pg_wchar.h" @@ -416,40 +416,40 @@ pg_big5_mblen(const unsigned char *s) } pg_wchar_tbl pg_wchar_table[] = { - {pg_ascii2wchar_with_len, pg_ascii_mblen}, - {pg_eucjp2wchar_with_len, pg_eucjp_mblen}, - {pg_euccn2wchar_with_len, pg_euccn_mblen}, - {pg_euckr2wchar_with_len, pg_euckr_mblen}, - {pg_euctw2wchar_with_len, pg_euctw_mblen}, - {pg_utf2wchar_with_len, pg_utf_mblen}, - {pg_mule2wchar_with_len, pg_mule_mblen}, - {pg_latin12wchar_with_len, pg_latin1_mblen}, - {pg_latin12wchar_with_len, pg_latin1_mblen}, - {pg_latin12wchar_with_len, pg_latin1_mblen}, - {pg_latin12wchar_with_len, pg_latin1_mblen}, - {pg_latin12wchar_with_len, pg_latin1_mblen}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, 0}, - {0, pg_sjis_mblen}, - {0, pg_big5_mblen} + {pg_ascii2wchar_with_len, pg_ascii_mblen}, /* 0 */ + {pg_eucjp2wchar_with_len, pg_eucjp_mblen}, /* 1 */ + {pg_euccn2wchar_with_len, pg_euccn_mblen}, /* 2 */ + {pg_euckr2wchar_with_len, pg_euckr_mblen}, /* 3 */ + {pg_euctw2wchar_with_len, pg_euctw_mblen}, /* 4 */ + {pg_utf2wchar_with_len, pg_utf_mblen}, /* 5 */ + {pg_mule2wchar_with_len, pg_mule_mblen}, /* 6 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 7 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 8 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 9 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 10 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 11 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 12 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 13 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 14 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 15 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 16 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 17 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 18 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 19 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 20 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 21 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 22 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 23 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 24 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 25 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 26 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 27 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 28 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 29 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 30 */ + {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 31 */ + {0, pg_sjis_mblen}, /* 32 */ + {0, pg_big5_mblen} /* 33 */ }; /* returns the byte length of a word for mule internal code */