postgresql/src/interfaces/odbc/multibyte.c

455 lines
8.6 KiB
C

/*--------
* Module : multibyte.c
*
* Description: New Mlutibyte related additional function.
*
* Create 2001-03-03 Eiji Tokuya
* New Create 2001-09-16 Eiji Tokuya
*--------
*/
#include "multibyte.h"
#include "connection.h"
#include "pgapifunc.h"
#include <string.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef TRUE
#define TRUE 1
#endif
pg_CS CS_Table[] =
{
{ "SQL_ASCII", SQL_ASCII },
{ "EUC_JP", EUC_JP },
{ "EUC_CN", EUC_CN },
{ "EUC_KR", EUC_KR },
{ "EUC_TW", EUC_TW },
{ "JOHAB", JOHAB },
{ "UNICODE", UTF8 },
{ "MULE_INTERNAL",MULE_INTERNAL },
{ "LATIN1", LATIN1 },
{ "LATIN2", LATIN2 },
{ "LATIN3", LATIN3 },
{ "LATIN4", LATIN4 },
{ "LATIN5", LATIN5 },
{ "LATIN6", LATIN6 },
{ "LATIN7", LATIN7 },
{ "LATIN8", LATIN8 },
{ "LATIN9", LATIN9 },
{ "LATIN10", LATIN10 },
{ "WIN1256", WIN1256 },
{ "TCVN", TCVN },
{ "WIN874", WIN874 },
{ "KOI8", KOI8R },
{ "WIN", WIN1251 },
{ "ALT", ALT },
{ "ISO_8859_5", ISO_8859_5 },
{ "ISO_8859_6", ISO_8859_6 },
{ "ISO_8859_7", ISO_8859_7 },
{ "ISO_8859_8", ISO_8859_8 },
{ "SJIS", SJIS },
{ "BIG5", BIG5 },
{ "GBK", GBK },
{ "UHC", UHC },
{ "WIN1250", WIN1250 },
{ "GB18030", GB18030 },
{ "OTHER", OTHER }
};
#ifdef NOT_USED
static int
pg_ismb(int characterset_code)
{
int i=0,MB_CHARACTERSET[]={EUC_JP,EUC_CN,EUC_KR,EUC_TW,UTF8,MULE_INTERNAL,SJIS,BIG5,GBK,UHC,JOHAB};
while (MB_CHARACTERSET[i] != characterset_code || OTHER != MB_CHARACTERSET[i] )
{
i++;
}
return (MB_CHARACTERSET[i]);
}
#endif
int
pg_CS_code(const unsigned char *characterset_string)
{
int i = 0, c = -1;
unsigned len = 0;
for(i = 0; CS_Table[i].code != OTHER; i++)
{
if (strstr(characterset_string,CS_Table[i].name))
{
if(strlen(CS_Table[i].name) >= len)
{
len = strlen(CS_Table[i].name);
c = CS_Table[i].code;
}
}
}
if (c < 0)
c = i;
return (c);
}
unsigned char *
pg_CS_name(int characterset_code)
{
int i;
for (i = 0; CS_Table[i].code != OTHER; i++)
{
if (CS_Table[i].code == characterset_code)
return CS_Table[i].name;
}
return ("OTHER");
}
int
pg_CS_stat(int stat,unsigned int character,int characterset_code)
{
if (character == 0)
stat = 0;
switch (characterset_code)
{
case UTF8:
{
if (stat < 2 &&
character >= 0x80)
{
if (character >= 0xfc)
stat = 6;
else if (character >= 0xf8)
stat = 5;
else if (character >= 0xf0)
stat = 4;
else if (character >= 0xe0)
stat = 3;
else if (character >= 0xc0)
stat = 2;
}
else if (stat > 2 &&
character > 0x7f)
stat--;
else
stat=0;
}
break;
/* Shift-JIS Support. */
case SJIS:
{
if (stat < 2 &&
character > 0x80 &&
!(character > 0x9f &&
character < 0xe0))
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
/* Chinese Big5 Support. */
case BIG5:
{
if (stat < 2 &&
character > 0xA0)
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
/* Chinese GBK Support. */
case GBK:
{
if (stat < 2 &&
character > 0x7F)
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
/* Korian UHC Support. */
case UHC:
{
if (stat < 2 &&
character > 0x7F)
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
/* EUC_JP Support */
case EUC_JP:
{
if (stat < 3 &&
character == 0x8f) /* JIS X 0212 */
stat = 3;
else
if (stat != 2 &&
(character == 0x8e ||
character > 0xa0)) /* Half Katakana HighByte & Kanji HighByte */
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
/* EUC_CN, EUC_KR, JOHAB Support */
case EUC_CN:
case EUC_KR:
case JOHAB:
{
if (stat < 2 &&
character > 0xa0)
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
case EUC_TW:
{
if (stat < 4 &&
character == 0x8e)
stat = 4;
else if (stat == 4 &&
character > 0xa0)
stat = 3;
else if ((stat == 3 ||
stat < 2) &&
character > 0xa0)
stat = 2;
else if (stat == 2)
stat = 1;
else
stat = 0;
}
break;
/*Chinese GB18030 support.Added by Bill Huang <bhuang@redhat.com> <bill_huanghb@ybb.ne.jp>*/
case GB18030:
{
if (stat < 2 && character > 0x80)
stat = 2;
else if (stat = 2)
if (character >= 0x30 && character <= 0x39)
stat = 3;
else
stat = 1;
else if (stat = 3)
if (character >= 0x30 && character <= 0x39)
stat = 1;
else
stat = 3;
else
stat = 0;
}
break;
default:
{
stat = 0;
}
break;
}
return stat;
}
unsigned char *
pg_mbschr(int csc, const unsigned char *string, unsigned int character)
{
int mb_st = 0;
const unsigned char *s, *rs = NULL;
for(s = string; *s ; s++)
{
mb_st = pg_CS_stat(mb_st, (unsigned char) *s, csc);
if (mb_st == 0 && (*s == character))
{
rs = s;
break;
}
}
return (rs);
}
int
pg_mbslen(int csc, const unsigned char *string)
{
unsigned char *s;
int len, cs_stat;
for (len = 0, cs_stat = 0, s = (unsigned char *) string; *s != 0; s++)
{
cs_stat = pg_CS_stat(cs_stat,(unsigned int) *s, csc);
if (cs_stat < 2)
len++;
}
return len;
}
unsigned char *
pg_mbsinc(int csc, const unsigned char *current )
{
int mb_stat = 0;
if (*current != 0)
{
mb_stat = (int) pg_CS_stat(mb_stat, *current, csc);
if (mb_stat == 0)
mb_stat = 1;
return ((unsigned char *) current + mb_stat);
}
else
return NULL;
}
static char *
CC_lookup_cs_new(ConnectionClass *self)
{
char *encstr = NULL;
QResultClass *res;
res = CC_send_query(self, "select pg_client_encoding()", NULL, CLEAR_RESULT_ON_ABORT);
if (res)
{
char *enc = QR_get_value_backend_row(res, 0, 0);
if (enc)
encstr = strdup(enc);
QR_Destructor(res);
}
return encstr;
}
static char *
CC_lookup_cs_old(ConnectionClass *self)
{
char *encstr = NULL;
HSTMT hstmt;
RETCODE result;
result = PGAPI_AllocStmt(self, &hstmt);
if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO))
return encstr;
result = PGAPI_ExecDirect(hstmt, "Show Client_Encoding", SQL_NTS);
if (result == SQL_SUCCESS_WITH_INFO)
{
char sqlState[8], errormsg[128], enc[32];
if (PGAPI_Error(NULL, NULL, hstmt, sqlState, NULL, errormsg,
sizeof(errormsg), NULL) == SQL_SUCCESS &&
sscanf(errormsg, "%*s %*s %*s %*s %*s %s", enc) > 0)
encstr = strdup(enc);
}
PGAPI_FreeStmt(hstmt, SQL_DROP);
return encstr;
}
void
CC_lookup_characterset(ConnectionClass *self)
{
char *encstr;
static char *func = "CC_lookup_characterset";
mylog("%s: entering...\n", func);
if (PG_VERSION_LT(self, 7.2))
encstr = CC_lookup_cs_old(self);
else
encstr = CC_lookup_cs_new(self);
if (self->client_encoding)
free(self->client_encoding);
#ifndef UNICODE_SUPPORT
#ifdef WIN32
else
{
const char *wenc = NULL;
switch (GetACP())
{
case 932:
wenc = "SJIS";
break;
case 936:
wenc = "GBK";
break;
case 949:
wenc = "UHC";
break;
case 950:
wenc = "BIG5";
break;
}
if (wenc && stricmp(encstr, wenc))
{
QResultClass *res;
char query[64];
sprintf(query, "set client_encoding to '%s'", wenc);
res = CC_send_query(self, query, NULL, CLEAR_RESULT_ON_ABORT);
if (res)
{
self->client_encoding = strdup(wenc);
self->ccsc = pg_CS_code(self->client_encoding);
QR_Destructor(res);
free(encstr);
return;
}
}
}
#endif /* WIN32 */
#endif /* UNICODE_SUPPORT */
if (encstr)
{
self->client_encoding = encstr;
self->ccsc = pg_CS_code(encstr);
qlog(" [ Client encoding = '%s' (code = %d) ]\n", self->client_encoding, self->ccsc);
if (stricmp(pg_CS_name(self->ccsc), encstr))
{
qlog(" Client encoding = '%s' and %s\n", self->client_encoding, pg_CS_name(self->ccsc));
self->errornumber = CONN_VALUE_OUT_OF_RANGE;
self->errormsg = "client encoding mismatch";
}
}
else
{
self->ccsc = SQL_ASCII;
self->client_encoding = NULL;
}
}
void encoded_str_constr(encoded_str *encstr, int ccsc, const char *str)
{
encstr->ccsc = ccsc;
encstr->encstr = str;
encstr->pos = -1;
encstr->ccst = 0;
}
int encoded_nextchar(encoded_str *encstr)
{
int chr;
chr = encstr->encstr[++encstr->pos];
encstr->ccst = pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc);
return chr;
}
int encoded_byte_check(encoded_str *encstr, int abspos)
{
int chr;
chr = encstr->encstr[encstr->pos = abspos];
encstr->ccst = pg_CS_stat(encstr->ccst, (unsigned int) chr, encstr->ccsc);
return chr;
}