Add PQmbdsplen() which returns the "display length" of a character.

Still some works needed:
- UTF-8, MULE_INTERNAL always returns 1
This commit is contained in:
Tatsuo Ishii 2004-03-15 10:41:26 +00:00
parent 1bc2d544b9
commit e8c3205037
6 changed files with 261 additions and 45 deletions

View File

@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.45 2003/11/29 19:52:02 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.46 2004/03/15 10:41:25 ishii Exp $
*/
#include "postgres.h"
@ -463,6 +463,13 @@ pg_mblen(const unsigned char *mbstr)
return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) (mbstr));
}
/* returns the display length of a multibyte word */
int
pg_dsplen(const unsigned char *mbstr)
{
return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) (mbstr));
}
/* returns the length (counted as a wchar) of a multibyte string */
int
pg_mbstrlen(const unsigned char *mbstr)

View File

@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.35 2003/11/29 22:39:59 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.36 2004/03/15 10:41:25 ishii Exp $
*
* WIN1250 client encoding updated by Pavel Behal
*
@ -49,6 +49,12 @@ pg_ascii_mblen(const unsigned char *s)
return (1);
}
static int
pg_ascii_dsplen(const unsigned char *s)
{
return (1);
}
/*
* EUC
*/
@ -107,6 +113,22 @@ pg_euc_mblen(const unsigned char *s)
return (len);
}
static int
pg_euc_dsplen(const unsigned char *s)
{
int len;
if (*s == SS2)
len = 2;
else if (*s == SS3)
len = 2;
else if (*s & 0x80)
len = 2;
else
len = 1;
return (len);
}
/*
* EUC_JP
*/
@ -122,6 +144,22 @@ pg_eucjp_mblen(const unsigned char *s)
return (pg_euc_mblen(s));
}
static int
pg_eucjp_dsplen(const unsigned char *s)
{
int len;
if (*s == SS2)
len = 1;
else if (*s == SS3)
len = 2;
else if (*s & 0x80)
len = 2;
else
len = 1;
return (len);
}
/*
* EUC_KR
*/
@ -137,6 +175,12 @@ pg_euckr_mblen(const unsigned char *s)
return (pg_euc_mblen(s));
}
static int
pg_euckr_dsplen(const unsigned char *s)
{
return (pg_euc_dsplen(s));
}
/*
* EUC_CN
*/
@ -191,6 +235,18 @@ pg_euccn_mblen(const unsigned char *s)
return (len);
}
static int
pg_euccn_dsplen(const unsigned char *s)
{
int len;
if (*s & 0x80)
len = 2;
else
len = 1;
return (len);
}
/*
* EUC_TW
*/
@ -250,6 +306,22 @@ pg_euctw_mblen(const unsigned char *s)
return (len);
}
static int
pg_euctw_dsplen(const unsigned char *s)
{
int len;
if (*s == SS2)
len = 2;
else if (*s == SS3)
len = 2;
else if (*s & 0x80)
len = 2;
else
len = 1;
return (len);
}
/*
* JOHAB
*/
@ -265,6 +337,12 @@ pg_johab_mblen(const unsigned char *s)
return (pg_euc_mblen(s));
}
static int
pg_johab_dsplen(const unsigned char *s)
{
return (pg_euc_dsplen(s));
}
/*
* convert UTF-8 string to pg_wchar (UCS-2)
* caller should allocate enough space for "to"
@ -333,6 +411,12 @@ pg_utf_mblen(const unsigned char *s)
return (len);
}
static int
pg_utf_dsplen(const unsigned char *s)
{
return 1; /* XXX fix me! */
}
/*
* convert mule internal code to pg_wchar
* caller should allocate enough space for "to"
@ -406,6 +490,12 @@ pg_mule_mblen(const unsigned char *s)
return (len);
}
static int
pg_mule_dsplen(const unsigned char *s)
{
return 1; /* XXX fix me! */
}
/*
* ISO8859-1
*/
@ -430,6 +520,12 @@ pg_latin1_mblen(const unsigned char *s)
return (1);
}
static int
pg_latin1_dsplen(const unsigned char *s)
{
return (1);
}
/*
* SJIS
*/
@ -453,6 +549,26 @@ pg_sjis_mblen(const unsigned char *s)
return (len);
}
static int
pg_sjis_dsplen(const unsigned char *s)
{
int len;
if (*s >= 0xa1 && *s <= 0xdf)
{ /* 1 byte kana? */
len = 1;
}
else if (*s > 0x7f)
{ /* kanji? */
len = 2;
}
else
{ /* should be ASCII */
len = 1;
}
return (len);
}
/*
* Big5
*/
@ -472,6 +588,22 @@ pg_big5_mblen(const unsigned char *s)
return (len);
}
static int
pg_big5_dsplen(const unsigned char *s)
{
int len;
if (*s > 0x7f)
{ /* kanji? */
len = 2;
}
else
{ /* should be ASCII */
len = 1;
}
return (len);
}
/*
* GBK
*/
@ -491,6 +623,22 @@ pg_gbk_mblen(const unsigned char *s)
return (len);
}
static int
pg_gbk_dsplen(const unsigned char *s)
{
int len;
if (*s > 0x7f)
{ /* kanji? */
len = 2;
}
else
{ /* should be ASCII */
len = 1;
}
return (len);
}
/*
* UHC
*/
@ -510,6 +658,22 @@ pg_uhc_mblen(const unsigned char *s)
return (len);
}
static int
pg_uhc_dsplen(const unsigned char *s)
{
int len;
if (*s > 0x7f)
{ /* 2byte? */
len = 2;
}
else
{ /* should be ASCII */
len = 1;
}
return (len);
}
/*
* * GB18030
* * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
@ -535,42 +699,58 @@ pg_gb18030_mblen(const unsigned char *s)
return (len);
}
static int
pg_gb18030_dsplen(const unsigned char *s)
{
int len;
if (*s <= 0x7f)
{ /* ASCII */
len = 1;
}
else
{
len = 2;
}
return (len);
}
pg_wchar_tbl pg_wchar_table[] = {
{pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII */
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */
{pg_euccn2wchar_with_len, pg_euccn_mblen, 3}, /* 2; PG_EUC_CN */
{pg_euckr2wchar_with_len, pg_euckr_mblen, 3}, /* 3; PG_EUC_KR */
{pg_euctw2wchar_with_len, pg_euctw_mblen, 3}, /* 4; PG_EUC_TW */
{pg_johab2wchar_with_len, pg_johab_mblen, 3}, /* 5; PG_JOHAB */
{pg_utf2wchar_with_len, pg_utf_mblen, 3}, /* 6; PG_UNICODE */
{pg_mule2wchar_with_len, pg_mule_mblen, 3}, /* 7; PG_MULE_INTERNAL */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 8; PG_LATIN1 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 9; PG_LATIN2 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 10; PG_LATIN3 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 11; PG_LATIN4 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 12; PG_LATIN5 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 13; PG_LATIN6 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 14; PG_LATIN7 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 15; PG_LATIN8 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 16; PG_LATIN9 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 17; PG_LATIN10 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 18; PG_WIN1256 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 19; PG_TCVN */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 20; PG_WIN874 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 21; PG_KOI8 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 22; PG_WIN1251 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 23; PG_ALT */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 24; ISO-8859-5 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 25; ISO-8859-6 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 26; ISO-8859-7 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 27; ISO-8859-8 */
{0, pg_sjis_mblen, 2}, /* 28; PG_SJIS */
{0, pg_big5_mblen, 2}, /* 29; PG_BIG5 */
{0, pg_gbk_mblen, 2}, /* 30; PG_GBK */
{0, pg_uhc_mblen, 2}, /* 31; PG_UHC */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 32; PG_WIN1250 */
{0, pg_gb18030_mblen, 2} /* 33; PG_GB18030 */
{pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, 1}, /* 0; PG_SQL_ASCII */
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, 3}, /* 1; PG_EUC_JP */
{pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, 3}, /* 2; PG_EUC_CN */
{pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, 3}, /* 3; PG_EUC_KR */
{pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, 3}, /* 4; PG_EUC_TW */
{pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, 3}, /* 5; PG_JOHAB */
{pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, 3}, /* 6; PG_UNICODE */
{pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, 3}, /* 7; PG_MULE_INTERNAL */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 8; PG_LATIN1 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 9; PG_LATIN2 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 10; PG_LATIN3 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 11; PG_LATIN4 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 12; PG_LATIN5 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 13; PG_LATIN6 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 14; PG_LATIN7 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 15; PG_LATIN8 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 16; PG_LATIN9 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 17; PG_LATIN10 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 18; PG_WIN1256 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 19; PG_TCVN */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 20; PG_WIN874 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 21; PG_KOI8 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 22; PG_WIN1251 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 23; PG_ALT */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 24; ISO-8859-5 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 25; ISO-8859-6 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 26; ISO-8859-7 */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 27; ISO-8859-8 */
{0, pg_sjis_mblen, pg_sjis_dsplen, 2}, /* 28; PG_SJIS */
{0, pg_big5_mblen, pg_big5_dsplen,2}, /* 29; PG_BIG5 */
{0, pg_gbk_mblen, pg_gbk_dsplen, 2}, /* 30; PG_GBK */
{0, pg_uhc_mblen, pg_uhc_dsplen, 2}, /* 31; PG_UHC */
{pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1}, /* 32; PG_WIN1250 */
{0, pg_gb18030_mblen, pg_gb18030_dsplen, 2} /* 33; PG_GB18030 */
};
/* returns the byte length of a word for mule internal code */
@ -594,6 +774,20 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr)
((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr)));
}
/*
* Returns the display length of a multibyte word.
*/
int
pg_encoding_dsplen(int encoding, const unsigned char *mbstr)
{
Assert(PG_VALID_ENCODING(encoding));
return ((encoding >= 0 &&
encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
((*pg_wchar_table[encoding].dsplen) (mbstr)) :
((*pg_wchar_table[PG_SQL_ASCII].dsplen) (mbstr)));
}
/*
* fetch maximum length of a char encoding
*/
@ -688,6 +882,3 @@ pg_database_encoding_max_length(void)
}
#endif

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2003, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.83 2004/03/14 04:25:17 tgl Exp $
* $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.84 2004/03/15 10:41:26 ishii Exp $
*/
#include "postgres_fe.h"
#include "common.h"
@ -410,7 +410,7 @@ ReportSyntaxErrorPosition(const PGresult *result, const char *query)
{
qidx[i] = qoffset;
scridx[i] = scroffset;
scroffset += 1; /* XXX fix me when we have screen width info */
scroffset += PQdsplen(&query[qoffset], pset.encoding);
qoffset += PQmblen(&query[qoffset], pset.encoding);
}
qidx[i] = qoffset;
@ -526,7 +526,7 @@ ReportSyntaxErrorPosition(const PGresult *result, const char *query)
scroffset = 0;
for (i = 0; i < msg.len; i += PQmblen(&msg.data[i], pset.encoding))
{
scroffset += 1; /* XXX fix me when we have screen width info */
scroffset += PQdsplen(&msg.data[i], pset.encoding);
}
/* Finish and emit the message. */

View File

@ -1,4 +1,4 @@
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.49 2003/11/29 22:41:04 pgsql Exp $ */
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.50 2004/03/15 10:41:26 ishii Exp $ */
#ifndef PG_WCHAR_H
#define PG_WCHAR_H
@ -248,11 +248,14 @@ typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
int len);
typedef int (*mblen_converter) (const unsigned char *mbstr);
typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
typedef struct
{
mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte
* string to a wchar */
mblen_converter mblen; /* returns the length of a multibyte char */
mbdisplaylen_converter dsplen; /* returns the lenghth of a display length */
int maxmblen; /* max bytes for a char in this charset */
} pg_wchar_tbl;
@ -283,7 +286,9 @@ extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
extern size_t pg_wchar_strlen(const pg_wchar *wstr);
extern int pg_mblen(const unsigned char *mbstr);
extern int pg_dsplen(const unsigned char *mbstr);
extern int pg_encoding_mblen(int encoding, const unsigned char *mbstr);
extern int pg_encoding_dsplen(int encoding, const unsigned char *mbstr);
extern int pg_mule_mblen(const unsigned char *mbstr);
extern int pg_mic_mblen(const unsigned char *mbstr);
extern int pg_mbstrlen(const unsigned char *mbstr);

View File

@ -23,7 +23,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.104 2003/11/29 19:52:12 pgsql Exp $
* $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.105 2004/03/15 10:41:26 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -1095,6 +1095,16 @@ PQmblen(const unsigned char *s, int encoding)
return (pg_encoding_mblen(encoding, s));
}
/*
* returns the display length of the word beginning s, using the
* specified encoding.
*/
int
PQdsplen(const unsigned char *s, int encoding)
{
return (pg_encoding_dsplen(encoding, s));
}
/*
* Get encoding id from environment variable PGCLIENTENCODING.
*/

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.102 2004/01/09 02:02:43 momjian Exp $
* $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.103 2004/03/15 10:41:26 ishii Exp $
*
*-------------------------------------------------------------------------
*/
@ -447,6 +447,9 @@ extern int lo_export(PGconn *conn, Oid lobjId, const char *filename);
/* Determine length of multibyte encoded char at *s */
extern int PQmblen(const unsigned char *s, int encoding);
/* Determine display length of multibyte encoded char at *s */
extern int PQdsplen(const unsigned char *s, int encoding);
/* Get encoding id from environment variable PGCLIENTENCODING */
extern int PQenv2encoding(void);