Remove dead encoding-conversion functions.

The code for conversions SQL_ASCII <-> MULE_INTERNAL and
SQL_ASCII <-> UTF8 was unreachable, because we long ago changed
the wrapper functions pg_do_encoding_conversion() et al so that
they have hard-wired behaviors for conversions involving SQL_ASCII.
(At least some of those fast paths date back to 2002, though it
looks like we may not have been totally consistent about this until
later.)  Given the lack of complaints, nobody is dissatisfied with
this state of affairs.  Hence, let's just remove the unreachable code.

Also, change CREATE CONVERSION so that it rejects attempts to
define such conversions.  Since we consider that SQL_ASCII represents
lack of knowledge about the encoding in use, such a conversion would
be semantically dubious even if it were reachable.

Adjust a couple of regression test cases that had randomly decided
to rely on these conversion functions rather than any other ones.

Discussion: https://postgr.es/m/41163.1559156593@sss.pgh.pa.us
This commit is contained in:
Tom Lane 2019-07-05 14:17:27 -04:00
parent ef777cb093
commit 0ab1a2e39b
18 changed files with 44 additions and 269 deletions

View File

@ -1896,7 +1896,11 @@ RESET client_encoding;
<para>
If the client character set is defined as <literal>SQL_ASCII</literal>,
encoding conversion is disabled, regardless of the server's character
set. Just as for the server, use of <literal>SQL_ASCII</literal> is unwise
set. (However, if the server's character set is
not <literal>SQL_ASCII</literal>, the server will still check that
incoming data is valid for that encoding; so the net effect is as
though the client character set were the same as the server's.)
Just as for the server, use of <literal>SQL_ASCII</literal> is unwise
unless you are working with all-ASCII data.
</para>
</sect2>

View File

@ -2496,18 +2496,6 @@
</thead>
<tbody>
<row>
<entry><literal>ascii_to_mic</literal></entry>
<entry><literal>SQL_ASCII</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>ascii_to_utf8</literal></entry>
<entry><literal>SQL_ASCII</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>big5_to_euc_tw</literal></entry>
<entry><literal>BIG5</literal></entry>
@ -2778,12 +2766,6 @@
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>mic_to_ascii</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>SQL_ASCII</literal></entry>
</row>
<row>
<entry><literal>mic_to_big5</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
@ -2904,12 +2886,6 @@
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>utf8_to_ascii</literal></entry>
<entry><literal>UTF8</literal></entry>
<entry><literal>SQL_ASCII</literal></entry>
</row>
<row>
<entry><literal>utf8_to_big5</literal></entry>
<entry><literal>UTF8</literal></entry>

View File

@ -28,12 +28,15 @@ CREATE [ DEFAULT ] CONVERSION <replaceable>name</replaceable>
<para>
<command>CREATE CONVERSION</command> defines a new conversion between
character set encodings. Also, conversions that
are marked <literal>DEFAULT</literal> can be used for automatic encoding
conversion between
client and server. For this purpose, two conversions, from encoding A to
B <emphasis>and</emphasis> from encoding B to A, must be defined.
</para>
two character set encodings.
</para>
<para>
Conversions that are marked <literal>DEFAULT</literal> can be used for
automatic encoding conversion between client and server. To support that
usage, two conversions, from encoding A to B <emphasis>and</emphasis>
from encoding B to A, must be defined.
</para>
<para>
To be able to create a conversion, you must have <literal>EXECUTE</literal> privilege
@ -122,6 +125,13 @@ conv_proc(
<refsect1 id="sql-createconversion-notes">
<title>Notes</title>
<para>
Neither the source nor the destination encoding can
be <literal>SQL_ASCII</literal>, as the server's behavior for cases
involving the <literal>SQL_ASCII</literal> <quote>encoding</quote> is
hard-wired.
</para>
<para>
Use <command>DROP CONVERSION</command> to remove user-defined conversions.
</para>

View File

@ -72,6 +72,18 @@ CreateConversionCommand(CreateConversionStmt *stmt)
errmsg("destination encoding \"%s\" does not exist",
to_encoding_name)));
/*
* We consider conversions to or from SQL_ASCII to be meaningless. (If
* you wish to change this, note that pg_do_encoding_conversion() and its
* sister functions have hard-wired fast paths for any conversion in which
* the source or target encoding is SQL_ASCII, so that an encoding
* conversion function declared for such a case will never be used.)
*/
if (from_encoding == PG_SQL_ASCII || to_encoding == PG_SQL_ASCII)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("encoding conversion to or from \"SQL_ASCII\" is not supported")));
/*
* Check the existence of the conversion function. Function name could be
* a qualified name.

View File

@ -132,51 +132,6 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
}
/*
* ASCII ---> MIC
*
* While ordinarily SQL_ASCII encoding is forgiving of high-bit-set
* characters, here we must take a hard line because we don't know
* the appropriate MIC equivalent.
*/
void
pg_ascii2mic(const unsigned char *l, unsigned char *p, int len)
{
int c1;
while (len > 0)
{
c1 = *l;
if (c1 == 0 || IS_HIGHBIT_SET(c1))
report_invalid_encoding(PG_SQL_ASCII, (const char *) l, len);
*p++ = c1;
l++;
len--;
}
*p = '\0';
}
/*
* MIC ---> ASCII
*/
void
pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len)
{
int c1;
while (len > 0)
{
c1 = *mic;
if (c1 == 0 || IS_HIGHBIT_SET(c1))
report_untranslatable_char(PG_MULE_INTERNAL, PG_SQL_ASCII,
(const char *) mic, len);
*p++ = c1;
mic++;
len--;
}
*p = '\0';
}
/*
* latin2mic_with_table: a generic single byte charset encoding
* conversion from a local charset to the mule internal code.

View File

@ -14,9 +14,9 @@ top_builddir = ../../../../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = \
ascii_and_mic cyrillic_and_mic euc_cn_and_mic euc_jp_and_sjis \
cyrillic_and_mic euc_cn_and_mic euc_jp_and_sjis \
euc_kr_and_mic euc_tw_and_big5 latin2_and_win1250 latin_and_mic \
utf8_and_ascii utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \
utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \
utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \
utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \
utf8_and_sjis utf8_and_win utf8_and_uhc \

View File

@ -1,13 +0,0 @@
#-------------------------------------------------------------------------
#
# src/backend/utils/mb/conversion_procs/ascii_and_mic/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/ascii_and_mic
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = ascii_and_mic
PGFILEDESC = "ascii <-> mic text conversions"
include $(srcdir)/../proc.mk

View File

@ -1,60 +0,0 @@
/*-------------------------------------------------------------------------
*
* ASCII and MULE_INTERNAL
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/mb/conversion_procs/ascii_and_mic/ascii_and_mic.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(ascii_to_mic);
PG_FUNCTION_INFO_V1(mic_to_ascii);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
ascii_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
CHECK_ENCODING_CONVERSION_ARGS(PG_SQL_ASCII, PG_MULE_INTERNAL);
pg_ascii2mic(src, dest, len);
PG_RETURN_VOID();
}
Datum
mic_to_ascii(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SQL_ASCII);
pg_mic2ascii(src, dest, len);
PG_RETURN_VOID();
}

View File

@ -1,13 +0,0 @@
#-------------------------------------------------------------------------
#
# src/backend/utils/mb/conversion_procs/utf8_and_ascii/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/utf8_and_ascii
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = utf8_and_ascii
PGFILEDESC = "utf8 <-> ascii text conversions"
include $(srcdir)/../proc.mk

View File

@ -1,62 +0,0 @@
/*-------------------------------------------------------------------------
*
* ASCII <--> UTF8
*
* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/mb/conversion_procs/utf8_and_ascii/utf8_and_ascii.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(ascii_to_utf8);
PG_FUNCTION_INFO_V1(utf8_to_ascii);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER -- source string length
* ) returns VOID;
* ----------
*/
Datum
ascii_to_utf8(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
CHECK_ENCODING_CONVERSION_ARGS(PG_SQL_ASCII, PG_UTF8);
/* this looks wrong, but basically we're just rejecting high-bit-set */
pg_ascii2mic(src, dest, len);
PG_RETURN_VOID();
}
Datum
utf8_to_ascii(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SQL_ASCII);
/* this looks wrong, but basically we're just rejecting high-bit-set */
pg_mic2ascii(src, dest, len);
PG_RETURN_VOID();
}

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201907053
#define CATALOG_VERSION_NO 201907054
#endif

View File

@ -15,12 +15,6 @@
[
{ oid => '4400', descr => 'conversion for SQL_ASCII to MULE_INTERNAL',
conname => 'ascii_to_mic', conforencoding => 'PG_SQL_ASCII',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'ascii_to_mic' },
{ oid => '4401', descr => 'conversion for MULE_INTERNAL to SQL_ASCII',
conname => 'mic_to_ascii', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_SQL_ASCII', conproc => 'mic_to_ascii' },
{ oid => '4402', descr => 'conversion for KOI8R to MULE_INTERNAL',
conname => 'koi8_r_to_mic', conforencoding => 'PG_KOI8R',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'koi8r_to_mic' },
@ -165,12 +159,6 @@
{ oid => '4449', descr => 'conversion for MULE_INTERNAL to LATIN4',
conname => 'mic_to_iso_8859_4', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_LATIN4', conproc => 'mic_to_latin4' },
{ oid => '4450', descr => 'conversion for SQL_ASCII to UTF8',
conname => 'ascii_to_utf8', conforencoding => 'PG_SQL_ASCII',
contoencoding => 'PG_UTF8', conproc => 'ascii_to_utf8' },
{ oid => '4451', descr => 'conversion for UTF8 to SQL_ASCII',
conname => 'utf8_to_ascii', conforencoding => 'PG_UTF8',
contoencoding => 'PG_SQL_ASCII', conproc => 'utf8_to_ascii' },
{ oid => '4452', descr => 'conversion for BIG5 to UTF8',
conname => 'big5_to_utf8', conforencoding => 'PG_BIG5',
contoencoding => 'PG_UTF8', conproc => 'big5_to_utf8' },

View File

@ -10082,16 +10082,6 @@
prosrc => 'binary_upgrade_set_missing_value' },
# conversion functions
{ oid => '4300',
descr => 'internal conversion function for SQL_ASCII to MULE_INTERNAL',
proname => 'ascii_to_mic', prolang => 'c', prorettype => 'void',
proargtypes => 'int4 int4 cstring internal int4', prosrc => 'ascii_to_mic',
probin => '$libdir/ascii_and_mic' },
{ oid => '4301',
descr => 'internal conversion function for MULE_INTERNAL to SQL_ASCII',
proname => 'mic_to_ascii', prolang => 'c', prorettype => 'void',
proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_ascii',
probin => '$libdir/ascii_and_mic' },
{ oid => '4302',
descr => 'internal conversion function for KOI8R to MULE_INTERNAL',
proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'void',
@ -10324,16 +10314,6 @@
proname => 'mic_to_latin4', prolang => 'c', prorettype => 'void',
proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin4',
probin => '$libdir/latin_and_mic' },
{ oid => '4350',
descr => 'internal conversion function for SQL_ASCII to UTF8',
proname => 'ascii_to_utf8', prolang => 'c', prorettype => 'void',
proargtypes => 'int4 int4 cstring internal int4', prosrc => 'ascii_to_utf8',
probin => '$libdir/utf8_and_ascii' },
{ oid => '4351',
descr => 'internal conversion function for UTF8 to SQL_ASCII',
proname => 'utf8_to_ascii', prolang => 'c', prorettype => 'void',
proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_ascii',
probin => '$libdir/utf8_and_ascii' },
{ oid => '4352', descr => 'internal conversion function for BIG5 to UTF8',
proname => 'big5_to_utf8', prolang => 'c', prorettype => 'void',
proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_utf8',

View File

@ -614,8 +614,6 @@ extern void report_untranslatable_char(int src_encoding, int dest_encoding,
extern void local2local(const unsigned char *l, unsigned char *p, int len,
int src_encoding, int dest_encoding, const unsigned char *tab);
extern void pg_ascii2mic(const unsigned char *l, unsigned char *p, int len);
extern void pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len);
extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
int lc, int encoding);
extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,

View File

@ -2713,7 +2713,7 @@ as 'select $1.f1 is not distinct from $2.f1 and $1.f2 is not distinct from $2.f2
create operator alter1.=(procedure = alter1.same, leftarg = alter1.ctype, rightarg = alter1.ctype);
create operator class alter1.ctype_hash_ops default for type alter1.ctype using hash as
operator 1 alter1.=(alter1.ctype, alter1.ctype);
create conversion alter1.ascii_to_utf8 for 'sql_ascii' to 'utf8' from ascii_to_utf8;
create conversion alter1.latin1_to_utf8 for 'latin1' to 'utf8' from iso8859_1_to_utf8;
create text search parser alter1.prs(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype);
create text search configuration alter1.cfg(parser = alter1.prs);
create text search template alter1.tmpl(init = dsimple_init, lexize = dsimple_lexize);
@ -2731,7 +2731,7 @@ alter operator alter1.=(alter1.ctype, alter1.ctype) set schema alter2;
alter function alter1.same(alter1.ctype, alter1.ctype) set schema alter2;
alter type alter1.ctype set schema alter1; -- no-op, same schema
alter type alter1.ctype set schema alter2;
alter conversion alter1.ascii_to_utf8 set schema alter2;
alter conversion alter1.latin1_to_utf8 set schema alter2;
alter text search parser alter1.prs set schema alter2;
alter text search configuration alter1.cfg set schema alter2;
alter text search template alter1.tmpl set schema alter2;
@ -2775,7 +2775,7 @@ drop cascades to type alter2.ctype
drop cascades to function alter2.same(alter2.ctype,alter2.ctype)
drop cascades to operator alter2.=(alter2.ctype,alter2.ctype)
drop cascades to operator family alter2.ctype_hash_ops for access method hash
drop cascades to conversion alter2.ascii_to_utf8
drop cascades to conversion alter2.latin1_to_utf8
drop cascades to text search parser alter2.prs
drop cascades to text search configuration alter2.cfg
drop cascades to text search template alter2.tmpl

View File

@ -394,7 +394,7 @@ WITH objects (type, name, args) AS (VALUES
('collation', '{default}', '{}'),
('table constraint', '{addr_nsp, gentable, a_chk}', '{}'),
('domain constraint', '{addr_nsp.gendomain}', '{domconstr}'),
('conversion', '{pg_catalog, ascii_to_mic}', '{}'),
('conversion', '{pg_catalog, koi8_r_to_mic}', '{}'),
('default value', '{addr_nsp, gentable, b}', '{}'),
('language', '{plpgsql}', '{}'),
-- large object
@ -468,7 +468,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*,
cast | | | (bigint AS integer) | t
table constraint | addr_nsp | | a_chk on addr_nsp.gentable | t
domain constraint | addr_nsp | | domconstr on addr_nsp.gendomain | t
conversion | pg_catalog | ascii_to_mic | pg_catalog.ascii_to_mic | t
conversion | pg_catalog | koi8_r_to_mic | pg_catalog.koi8_r_to_mic | t
language | | plpgsql | plpgsql | t
schema | | addr_nsp | addr_nsp | t
operator class | pg_catalog | int4_ops | pg_catalog.int4_ops USING btree | t

View File

@ -1735,7 +1735,7 @@ create operator alter1.=(procedure = alter1.same, leftarg = alter1.ctype, right
create operator class alter1.ctype_hash_ops default for type alter1.ctype using hash as
operator 1 alter1.=(alter1.ctype, alter1.ctype);
create conversion alter1.ascii_to_utf8 for 'sql_ascii' to 'utf8' from ascii_to_utf8;
create conversion alter1.latin1_to_utf8 for 'latin1' to 'utf8' from iso8859_1_to_utf8;
create text search parser alter1.prs(start = prsd_start, gettoken = prsd_nexttoken, end = prsd_end, lextypes = prsd_lextype);
create text search configuration alter1.cfg(parser = alter1.prs);
@ -1756,7 +1756,7 @@ alter operator alter1.=(alter1.ctype, alter1.ctype) set schema alter2;
alter function alter1.same(alter1.ctype, alter1.ctype) set schema alter2;
alter type alter1.ctype set schema alter1; -- no-op, same schema
alter type alter1.ctype set schema alter2;
alter conversion alter1.ascii_to_utf8 set schema alter2;
alter conversion alter1.latin1_to_utf8 set schema alter2;
alter text search parser alter1.prs set schema alter2;
alter text search configuration alter1.cfg set schema alter2;
alter text search template alter1.tmpl set schema alter2;

View File

@ -164,7 +164,7 @@ WITH objects (type, name, args) AS (VALUES
('collation', '{default}', '{}'),
('table constraint', '{addr_nsp, gentable, a_chk}', '{}'),
('domain constraint', '{addr_nsp.gendomain}', '{domconstr}'),
('conversion', '{pg_catalog, ascii_to_mic}', '{}'),
('conversion', '{pg_catalog, koi8_r_to_mic}', '{}'),
('default value', '{addr_nsp, gentable, b}', '{}'),
('language', '{plpgsql}', '{}'),
-- large object