From 4ab6ebf3f4dc8182556dc23c49ee59e602a78f1c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Sat, 12 Sep 2009 22:13:12 +0000 Subject: [PATCH] Add Unicode support in PL/Python PL/Python now accepts Unicode objects where it previously only accepted string objects (for example, as return value). Unicode objects are converted to the PostgreSQL server encoding as necessary. This change is also necessary for future Python 3 support, which treats all strings as Unicode objects. Since this removes the error conditions that the plpython_unicode test file tested for, the alternative result files are no longer necessary. --- src/pl/plpython/expected/README | 4 - src/pl/plpython/expected/plpython_trigger.out | 27 ++++++ src/pl/plpython/expected/plpython_unicode.out | 59 +++++++------ .../plpython/expected/plpython_unicode_2.out | 45 ---------- .../plpython/expected/plpython_unicode_3.out | 45 ---------- src/pl/plpython/plpython.c | 82 ++++++++++++++++--- src/pl/plpython/sql/plpython_trigger.sql | 33 ++++++++ src/pl/plpython/sql/plpython_unicode.sql | 23 +++--- 8 files changed, 175 insertions(+), 143 deletions(-) delete mode 100644 src/pl/plpython/expected/plpython_unicode_2.out delete mode 100644 src/pl/plpython/expected/plpython_unicode_3.out diff --git a/src/pl/plpython/expected/README b/src/pl/plpython/expected/README index 3864c0b0c1..574062dd77 100644 --- a/src/pl/plpython/expected/README +++ b/src/pl/plpython/expected/README @@ -2,7 +2,3 @@ Guide to alternative expected files: plpython_error_2.out Python 2.2, 2.3, 2.4 plpython_error.out Python 2.5, 2.6 - -plpython_unicode_2.out Python 2.2 -plpython_unicode_3.out Python 2.3, 2.4 -plpython_unicode.out Python 2.5, 2.6 diff --git a/src/pl/plpython/expected/plpython_trigger.out b/src/pl/plpython/expected/plpython_trigger.out index 6be1c9dd0c..06a8645a35 100644 --- a/src/pl/plpython/expected/plpython_trigger.out +++ b/src/pl/plpython/expected/plpython_trigger.out @@ -342,6 +342,19 @@ ERROR: unexpected return value from trigger procedure DETAIL: Expected None, "OK", "SKIP", or "MODIFY". CONTEXT: PL/Python function "stupid3" DROP TRIGGER stupid_trigger3 ON trigger_test; +-- Unicode variant +CREATE FUNCTION stupid3u() RETURNS trigger +AS $$ + return u"foo" +$$ LANGUAGE plpythonu; +CREATE TRIGGER stupid_trigger3 +BEFORE UPDATE ON trigger_test +FOR EACH ROW EXECUTE PROCEDURE stupid3u(); +UPDATE trigger_test SET v = 'null' WHERE i = 0; +ERROR: unexpected return value from trigger procedure +DETAIL: Expected None, "OK", "SKIP", or "MODIFY". +CONTEXT: PL/Python function "stupid3u" +DROP TRIGGER stupid_trigger3 ON trigger_test; -- deleting the TD dictionary CREATE FUNCTION stupid4() RETURNS trigger AS $$ @@ -398,6 +411,20 @@ ERROR: key "a" found in TD["new"] does not exist as a column in the triggering CONTEXT: while modifying trigger row PL/Python function "stupid7" DROP TRIGGER stupid_trigger7 ON trigger_test; +-- Unicode variant +CREATE FUNCTION stupid7u() RETURNS trigger +AS $$ + TD["new"] = {u'a': 'foo', u'b': 'bar'} + return "MODIFY" +$$ LANGUAGE plpythonu; +CREATE TRIGGER stupid_trigger7 +BEFORE UPDATE ON trigger_test +FOR EACH ROW EXECUTE PROCEDURE stupid7u(); +UPDATE trigger_test SET v = 'null' WHERE i = 0; +ERROR: key "a" found in TD["new"] does not exist as a column in the triggering row +CONTEXT: while modifying trigger row +PL/Python function "stupid7u" +DROP TRIGGER stupid_trigger7 ON trigger_test; -- calling a trigger function directly SELECT stupid7(); ERROR: trigger functions can only be called as triggers diff --git a/src/pl/plpython/expected/plpython_unicode.out b/src/pl/plpython/expected/plpython_unicode.out index d3b6fd1db7..c4ab73fd24 100644 --- a/src/pl/plpython/expected/plpython_unicode.out +++ b/src/pl/plpython/expected/plpython_unicode.out @@ -4,42 +4,47 @@ CREATE TABLE unicode_test ( testvalue text NOT NULL ); -CREATE FUNCTION unicode_return_error() RETURNS text AS E' +CREATE FUNCTION unicode_return() RETURNS text AS E' return u"\\x80" ' LANGUAGE plpythonu; -CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E' +CREATE FUNCTION unicode_trigger() RETURNS trigger AS E' TD["new"]["testvalue"] = u"\\x80" return "MODIFY" ' LANGUAGE plpythonu; CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test - FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error(); -CREATE FUNCTION unicode_plan_error1() RETURNS text AS E' + FOR EACH ROW EXECUTE PROCEDURE unicode_trigger(); +CREATE FUNCTION unicode_plan1() RETURNS text AS E' plan = plpy.prepare("SELECT $1 AS testvalue", ["text"]) rv = plpy.execute(plan, [u"\\x80"], 1) return rv[0]["testvalue"] ' LANGUAGE plpythonu; -CREATE FUNCTION unicode_plan_error2() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"]) -rv = plpy.execute(plan, u"\\x80", 1) -return rv[0]["testvalue1"] +CREATE FUNCTION unicode_plan2() RETURNS text AS E' +plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"]) +rv = plpy.execute(plan, ["foo", "bar"], 1) +return rv[0]["testvalue"] ' LANGUAGE plpythonu; -SELECT unicode_return_error(); -ERROR: PL/Python: could not create string representation of Python object -DETAIL: : 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: while creating return value -PL/Python function "unicode_return_error" +SELECT unicode_return(); + unicode_return +---------------- + \u0080 +(1 row) + INSERT INTO unicode_test (testvalue) VALUES ('test'); -ERROR: PL/Python: could not create string representation of Python object -DETAIL: : 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: while modifying trigger row -PL/Python function "unicode_trigger_error" -SELECT unicode_plan_error1(); -WARNING: PL/Python: : unrecognized error in PLy_spi_execute_plan -CONTEXT: PL/Python function "unicode_plan_error1" -ERROR: PL/Python: could not execute plan -DETAIL: : 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan_error1" -SELECT unicode_plan_error2(); -ERROR: PL/Python: could not execute plan -DETAIL: : 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan_error2" +SELECT * FROM unicode_test; + testvalue +----------- + \u0080 +(1 row) + +SELECT unicode_plan1(); + unicode_plan1 +--------------- + \u0080 +(1 row) + +SELECT unicode_plan2(); + unicode_plan2 +--------------- + foobar +(1 row) + diff --git a/src/pl/plpython/expected/plpython_unicode_2.out b/src/pl/plpython/expected/plpython_unicode_2.out deleted file mode 100644 index 1f393fbef3..0000000000 --- a/src/pl/plpython/expected/plpython_unicode_2.out +++ /dev/null @@ -1,45 +0,0 @@ --- --- Unicode handling --- -CREATE TABLE unicode_test ( - testvalue text NOT NULL -); -CREATE FUNCTION unicode_return_error() RETURNS text AS E' -return u"\\x80" -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E' -TD["new"]["testvalue"] = u"\\x80" -return "MODIFY" -' LANGUAGE plpythonu; -CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test - FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error(); -CREATE FUNCTION unicode_plan_error1() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue", ["text"]) -rv = plpy.execute(plan, [u"\\x80"], 1) -return rv[0]["testvalue"] -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_plan_error2() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"]) -rv = plpy.execute(plan, u"\\x80", 1) -return rv[0]["testvalue1"] -' LANGUAGE plpythonu; -SELECT unicode_return_error(); -ERROR: PL/Python: could not create string representation of Python object -DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: while creating return value -PL/Python function "unicode_return_error" -INSERT INTO unicode_test (testvalue) VALUES ('test'); -ERROR: PL/Python: could not create string representation of Python object -DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: while modifying trigger row -PL/Python function "unicode_trigger_error" -SELECT unicode_plan_error1(); -WARNING: PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan -CONTEXT: PL/Python function "unicode_plan_error1" -ERROR: PL/Python: could not execute plan -DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan_error1" -SELECT unicode_plan_error2(); -ERROR: PL/Python: could not execute plan -DETAIL: exceptions.UnicodeError: ASCII encoding error: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan_error2" diff --git a/src/pl/plpython/expected/plpython_unicode_3.out b/src/pl/plpython/expected/plpython_unicode_3.out deleted file mode 100644 index 620f9f5790..0000000000 --- a/src/pl/plpython/expected/plpython_unicode_3.out +++ /dev/null @@ -1,45 +0,0 @@ --- --- Unicode handling --- -CREATE TABLE unicode_test ( - testvalue text NOT NULL -); -CREATE FUNCTION unicode_return_error() RETURNS text AS E' -return u"\\x80" -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E' -TD["new"]["testvalue"] = u"\\x80" -return "MODIFY" -' LANGUAGE plpythonu; -CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test - FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error(); -CREATE FUNCTION unicode_plan_error1() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue", ["text"]) -rv = plpy.execute(plan, [u"\\x80"], 1) -return rv[0]["testvalue"] -' LANGUAGE plpythonu; -CREATE FUNCTION unicode_plan_error2() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"]) -rv = plpy.execute(plan, u"\\x80", 1) -return rv[0]["testvalue1"] -' LANGUAGE plpythonu; -SELECT unicode_return_error(); -ERROR: PL/Python: could not create string representation of Python object -DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: while creating return value -PL/Python function "unicode_return_error" -INSERT INTO unicode_test (testvalue) VALUES ('test'); -ERROR: PL/Python: could not create string representation of Python object -DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: while modifying trigger row -PL/Python function "unicode_trigger_error" -SELECT unicode_plan_error1(); -WARNING: PL/Python: plpy.Error: unrecognized error in PLy_spi_execute_plan -CONTEXT: PL/Python function "unicode_plan_error1" -ERROR: PL/Python: could not execute plan -DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan_error1" -SELECT unicode_plan_error2(); -ERROR: PL/Python: could not execute plan -DETAIL: exceptions.UnicodeEncodeError: 'ascii' codec can't encode character u'\x80' in position 0: ordinal not in range(128) -CONTEXT: PL/Python function "unicode_plan_error2" diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c index 909eab033b..ae898385b5 100644 --- a/src/pl/plpython/plpython.c +++ b/src/pl/plpython/plpython.c @@ -1,7 +1,7 @@ /********************************************************************** * plpython.c - python as a procedural language for PostgreSQL * - * $PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.128 2009/09/09 19:00:09 petere Exp $ + * $PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.129 2009/09/12 22:13:12 petere Exp $ * ********************************************************************* */ @@ -54,6 +54,7 @@ typedef int Py_ssize_t; #include "executor/spi.h" #include "funcapi.h" #include "fmgr.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "parser/parse_type.h" @@ -238,6 +239,9 @@ static void *PLy_malloc0(size_t); static char *PLy_strdup(const char *); static void PLy_free(void *); +static PyObject*PLyUnicode_Str(PyObject *unicode); +static char *PLyUnicode_AsString(PyObject *unicode); + /* sub handlers for functions and triggers */ static Datum PLy_function_handler(FunctionCallInfo fcinfo, PLyProcedure *); static HeapTuple PLy_trigger_handler(FunctionCallInfo fcinfo, PLyProcedure *); @@ -474,13 +478,19 @@ PLy_trigger_handler(FunctionCallInfo fcinfo, PLyProcedure *proc) { char *srv; - if (!PyString_Check(plrv)) + if (PyString_Check(plrv)) + srv = PyString_AsString(plrv); + else if (PyUnicode_Check(plrv)) + srv = PLyUnicode_AsString(plrv); + else + { ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("unexpected return value from trigger procedure"), errdetail("Expected None or a string."))); + srv = NULL; /* keep compiler quiet */ + } - srv = PyString_AsString(plrv); if (pg_strcasecmp(srv, "SKIP") == 0) rv = NULL; else if (pg_strcasecmp(srv, "MODIFY") == 0) @@ -572,15 +582,24 @@ PLy_modify_tuple(PLyProcedure *proc, PyObject *pltd, TriggerData *tdata, for (i = 0; i < natts; i++) { + char *plattstr; + platt = PyList_GetItem(plkeys, i); - if (!PyString_Check(platt)) + if (PyString_Check(platt)) + plattstr = PyString_AsString(platt); + else if (PyUnicode_Check(platt)) + plattstr = PLyUnicode_AsString(platt); + else + { ereport(ERROR, (errmsg("TD[\"new\"] dictionary key at ordinal position %d is not a string", i))); - attn = SPI_fnumber(tupdesc, PyString_AsString(platt)); + plattstr = NULL; /* keep compiler quiet */ + } + attn = SPI_fnumber(tupdesc, plattstr); if (attn == SPI_ERROR_NOATTRIBUTE) ereport(ERROR, (errmsg("key \"%s\" found in TD[\"new\"] does not exist as a column in the triggering row", - PyString_AsString(platt)))); + plattstr))); atti = attn - 1; plval = PyDict_GetItem(plntup, platt); @@ -1942,7 +1961,10 @@ PLyObject_ToDatum(PLyTypeInfo *info, Assert(plrv != Py_None); - plrv_so = PyObject_Str(plrv); + if (PyUnicode_Check(plrv)) + plrv_so = PLyUnicode_Str(plrv); + else + plrv_so = PyObject_Str(plrv); if (!plrv_so) PLy_elog(ERROR, "could not create string representation of Python object"); @@ -2562,10 +2584,16 @@ PLy_spi_prepare(PyObject *self, PyObject *args) Form_pg_type typeStruct; optr = PySequence_GetItem(list, i); - if (!PyString_Check(optr)) + if (PyString_Check(optr)) + sptr = PyString_AsString(optr); + else if (PyUnicode_Check(optr)) + sptr = PLyUnicode_AsString(optr); + else + { ereport(ERROR, (errmsg("plpy.prepare: type name at ordinal position %d is not a string", i))); - sptr = PyString_AsString(optr); + sptr = NULL; /* keep compiler quiet */ + } /******************************************************** * Resolve argument type names and then look them up by @@ -2670,7 +2698,7 @@ PLy_spi_execute_plan(PyObject *ob, PyObject *list, long limit) if (list != NULL) { - if (!PySequence_Check(list) || PyString_Check(list)) + if (!PySequence_Check(list) || PyString_Check(list) || PyUnicode_Check(list)) { PLy_exception_set(PLy_exc_spi_error, "plpy.execute takes a sequence as its second argument"); return NULL; @@ -2714,7 +2742,10 @@ PLy_spi_execute_plan(PyObject *ob, PyObject *list, long limit) elem = PySequence_GetItem(list, j); if (elem != Py_None) { - so = PyObject_Str(elem); + if (PyUnicode_Check(elem)) + so = PLyUnicode_Str(elem); + else + so = PyObject_Str(elem); if (!so) PLy_elog(ERROR, "could not execute plan"); Py_DECREF(elem); @@ -3303,3 +3334,32 @@ PLy_free(void *ptr) { free(ptr); } + +/* + * Convert a Python unicode object to a Python string object in + * PostgreSQL server encoding. Reference ownership is passed to the + * caller. + */ +static PyObject* +PLyUnicode_Str(PyObject *unicode) +{ + /* + * This assumes that the PostgreSQL encoding names are acceptable + * to Python, but that appears to be the case. + */ + return PyUnicode_AsEncodedString(unicode, GetDatabaseEncodingName(), "strict"); +} + +/* + * Convert a Python unicode object to a C string in PostgreSQL server + * encoding. No Python object reference is passed out of this + * function. + */ +static char * +PLyUnicode_AsString(PyObject *unicode) +{ + PyObject *o = PLyUnicode_Str(unicode); + char *rv = PyString_AsString(o); + Py_XDECREF(o); + return rv; +} diff --git a/src/pl/plpython/sql/plpython_trigger.sql b/src/pl/plpython/sql/plpython_trigger.sql index 385fa93bda..d6f441f827 100644 --- a/src/pl/plpython/sql/plpython_trigger.sql +++ b/src/pl/plpython/sql/plpython_trigger.sql @@ -159,6 +159,22 @@ UPDATE trigger_test SET v = 'null' WHERE i = 0; DROP TRIGGER stupid_trigger3 ON trigger_test; +-- Unicode variant + +CREATE FUNCTION stupid3u() RETURNS trigger +AS $$ + return u"foo" +$$ LANGUAGE plpythonu; + +CREATE TRIGGER stupid_trigger3 +BEFORE UPDATE ON trigger_test +FOR EACH ROW EXECUTE PROCEDURE stupid3u(); + +UPDATE trigger_test SET v = 'null' WHERE i = 0; + +DROP TRIGGER stupid_trigger3 ON trigger_test; + + -- deleting the TD dictionary CREATE FUNCTION stupid4() RETURNS trigger @@ -227,6 +243,23 @@ UPDATE trigger_test SET v = 'null' WHERE i = 0; DROP TRIGGER stupid_trigger7 ON trigger_test; +-- Unicode variant + +CREATE FUNCTION stupid7u() RETURNS trigger +AS $$ + TD["new"] = {u'a': 'foo', u'b': 'bar'} + return "MODIFY" +$$ LANGUAGE plpythonu; + +CREATE TRIGGER stupid_trigger7 +BEFORE UPDATE ON trigger_test +FOR EACH ROW EXECUTE PROCEDURE stupid7u(); + +UPDATE trigger_test SET v = 'null' WHERE i = 0; + +DROP TRIGGER stupid_trigger7 ON trigger_test; + + -- calling a trigger function directly SELECT stupid7(); diff --git a/src/pl/plpython/sql/plpython_unicode.sql b/src/pl/plpython/sql/plpython_unicode.sql index d2c8ee1bd8..6b9fac682a 100644 --- a/src/pl/plpython/sql/plpython_unicode.sql +++ b/src/pl/plpython/sql/plpython_unicode.sql @@ -6,32 +6,33 @@ CREATE TABLE unicode_test ( testvalue text NOT NULL ); -CREATE FUNCTION unicode_return_error() RETURNS text AS E' +CREATE FUNCTION unicode_return() RETURNS text AS E' return u"\\x80" ' LANGUAGE plpythonu; -CREATE FUNCTION unicode_trigger_error() RETURNS trigger AS E' +CREATE FUNCTION unicode_trigger() RETURNS trigger AS E' TD["new"]["testvalue"] = u"\\x80" return "MODIFY" ' LANGUAGE plpythonu; CREATE TRIGGER unicode_test_bi BEFORE INSERT ON unicode_test - FOR EACH ROW EXECUTE PROCEDURE unicode_trigger_error(); + FOR EACH ROW EXECUTE PROCEDURE unicode_trigger(); -CREATE FUNCTION unicode_plan_error1() RETURNS text AS E' +CREATE FUNCTION unicode_plan1() RETURNS text AS E' plan = plpy.prepare("SELECT $1 AS testvalue", ["text"]) rv = plpy.execute(plan, [u"\\x80"], 1) return rv[0]["testvalue"] ' LANGUAGE plpythonu; -CREATE FUNCTION unicode_plan_error2() RETURNS text AS E' -plan = plpy.prepare("SELECT $1 AS testvalue1, $2 AS testvalue2", ["text", "text"]) -rv = plpy.execute(plan, u"\\x80", 1) -return rv[0]["testvalue1"] +CREATE FUNCTION unicode_plan2() RETURNS text AS E' +plan = plpy.prepare("SELECT $1 || $2 AS testvalue", ["text", u"text"]) +rv = plpy.execute(plan, ["foo", "bar"], 1) +return rv[0]["testvalue"] ' LANGUAGE plpythonu; -SELECT unicode_return_error(); +SELECT unicode_return(); INSERT INTO unicode_test (testvalue) VALUES ('test'); -SELECT unicode_plan_error1(); -SELECT unicode_plan_error2(); +SELECT * FROM unicode_test; +SELECT unicode_plan1(); +SELECT unicode_plan2();