Fix up memory management problems in contrib/xml2.

Get rid of the code that attempted to funnel libxml2's memory allocations
into palloc.   We already knew from experience with the core xml datatype
that trying to do this is simply not reliable.  Unlike the core code, I
did not bother adding a lot of PG_TRY/PG_CATCH logic to try to ensure that
everything is cleaned up on error exit.  Hence, we might leak some memory
if one of these functions fails partway through.  Given the deprecated
status of this contrib module and the fact that errors partway through
the functions shouldn't be too common, it doesn't seem worth worrying about.

Also fix a separate bug in xpath_table, that it did the wrong things
if given a result tuple descriptor with less than 2 columns.  While
such a case isn't very useful in practice, we shouldn't fail or stomp
memory when it occurs.

Add some simple regression tests based on all the reported crash cases
that I have on hand.

This should be back-patched, but let's see if the buildfarm likes it first.
This commit is contained in:
Tom Lane 2010-02-28 21:31:57 +00:00
parent 7d7db18a68
commit 936c4af1c6
4 changed files with 253 additions and 57 deletions

View File

@ -1,4 +1,4 @@
# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.12 2008/05/08 16:49:37 tgl Exp $
# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.13 2010/02/28 21:31:57 tgl Exp $
MODULE_big = pgxml
@ -8,6 +8,7 @@ SHLIB_LINK += $(filter -lxslt, $(LIBS)) $(filter -lxml2, $(LIBS))
DATA_built = pgxml.sql
DATA = uninstall_pgxml.sql
REGRESS = xml2
ifdef USE_PGXS
PG_CONFIG = pg_config

View File

@ -0,0 +1,147 @@
--
-- first, define the functions. Turn off echoing so that expected file
-- does not depend on contents of pgxml.sql.
--
SET client_min_messages = warning;
\set ECHO none
RESET client_min_messages;
select query_to_xml('select 1 as x',true,false,'');
query_to_xml
---------------------------------------------------------------
<table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+
+
<row> +
<x>1</x> +
</row> +
+
</table> +
(1 row)
select xslt_process( query_to_xml('select x from generate_series(1,5) as
x',true,false,'')::text,
$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes" />
<xsl:template match="*">
<xsl:copy>
<xsl:copy-of select="@*" />
<xsl:apply-templates />
</xsl:copy>
</xsl:template>
<xsl:template match="comment()|processing-instruction()">
<xsl:copy />
</xsl:template>
</xsl:stylesheet>
$$::text);
xslt_process
---------------------------------------------------------------
<?xml version="1.0"?> +
<table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+
+
<row> +
<x>1</x> +
</row> +
+
<row> +
<x>2</x> +
</row> +
+
<row> +
<x>3</x> +
</row> +
+
<row> +
<x>4</x> +
</row> +
+
<row> +
<x>5</x> +
</row> +
+
</table> +
(1 row)
CREATE TABLE xpath_test (id integer NOT NULL, t xml);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
id
----
(0 rows)
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
id | doc
----+-----
1 | 1
(1 row)
DROP TABLE xpath_test;
CREATE TABLE xpath_test (id integer NOT NULL, t text);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
id
----
(0 rows)
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
id | doc
----+-----
1 | 1
(1 row)
create table articles (article_id integer, article_xml xml, date_entered date);
insert into articles (article_id, article_xml, date_entered)
values (2, '<article><author>test</author><pages>37</pages></article>', now());
SELECT * FROM
xpath_table('article_id',
'article_xml',
'articles',
'/article/author|/article/pages|/article/title',
'date_entered > ''2003-01-01'' ')
AS t(article_id integer, author text, page_count integer, title text);
article_id | author | page_count | title
------------+--------+------------+-------
2 | test | 37 |
(1 row)
-- this used to fail when invoked a second time
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
xslt_process
--------------
<aaa/> +
(1 row)
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
xslt_process
--------------
<aaa/> +
(1 row)
create table t1 (id integer, xml_data xml);
insert into t1 (id, xml_data)
values
(1, '<attributes><attribute name="attr_1">Some
Value</attribute></attributes>');
create index idx_xpath on t1 ( xpath_string
('/attributes/attribute[@name="attr_1"]/text()', xml_data::text));

82
contrib/xml2/sql/xml2.sql Normal file
View File

@ -0,0 +1,82 @@
--
-- first, define the functions. Turn off echoing so that expected file
-- does not depend on contents of pgxml.sql.
--
SET client_min_messages = warning;
\set ECHO none
\i pgxml.sql
\set ECHO all
RESET client_min_messages;
select query_to_xml('select 1 as x',true,false,'');
select xslt_process( query_to_xml('select x from generate_series(1,5) as
x',true,false,'')::text,
$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes" />
<xsl:template match="*">
<xsl:copy>
<xsl:copy-of select="@*" />
<xsl:apply-templates />
</xsl:copy>
</xsl:template>
<xsl:template match="comment()|processing-instruction()">
<xsl:copy />
</xsl:template>
</xsl:stylesheet>
$$::text);
CREATE TABLE xpath_test (id integer NOT NULL, t xml);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
DROP TABLE xpath_test;
CREATE TABLE xpath_test (id integer NOT NULL, t text);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
create table articles (article_id integer, article_xml xml, date_entered date);
insert into articles (article_id, article_xml, date_entered)
values (2, '<article><author>test</author><pages>37</pages></article>', now());
SELECT * FROM
xpath_table('article_id',
'article_xml',
'articles',
'/article/author|/article/pages|/article/title',
'date_entered > ''2003-01-01'' ')
AS t(article_id integer, author text, page_count integer, title text);
-- this used to fail when invoked a second time
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
create table t1 (id integer, xml_data xml);
insert into t1 (id, xml_data)
values
(1, '<attributes><attribute name="attr_1">Some
Value</attribute></attributes>');
create index idx_xpath on t1 ( xpath_string
('/attributes/attribute[@name="attr_1"]/text()', xml_data::text));

View File

@ -1,5 +1,5 @@
/*
* $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.26 2010/02/28 19:51:37 tgl Exp $
* $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.27 2010/02/28 21:31:57 tgl Exp $
*
* Parser interface for DOM-based parser (libxml) rather than
* stream-based SAX-type parser
@ -42,10 +42,6 @@ void pgxml_parser_init(void);
/* local declarations */
static void *pgxml_palloc(size_t size);
static void *pgxml_repalloc(void *ptr, size_t size);
static void pgxml_pfree(void *ptr);
static char *pgxml_pstrdup(const char *string);
static void pgxml_errorHandler(void *ctxt, const char *msg,...);
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
@ -59,41 +55,10 @@ static xmlChar *pgxml_texttoxmlchar(text *textstring);
static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath);
/* Global variables */
static char *pgxml_errorMsg = NULL; /* overall error message */
/* memory handling passthrough functions (e.g. palloc, pstrdup are
currently macros, and the others might become so...) */
static void *
pgxml_palloc(size_t size)
{
/* elog(DEBUG1,"Alloc %d in CMC %p",size,CurrentMemoryContext); */
return palloc(size);
}
static void *
pgxml_repalloc(void *ptr, size_t size)
{
/* elog(DEBUG1,"ReAlloc in CMC %p",CurrentMemoryContext);*/
return repalloc(ptr, size);
}
static void
pgxml_pfree(void *ptr)
{
/* elog(DEBUG1,"Free in CMC %p",CurrentMemoryContext); */
pfree(ptr);
}
static char *
pgxml_pstrdup(const char *string)
{
return pstrdup(string);
}
/*
* The error handling function. This formats an error message and sets
* a flag - an ereport will be issued prior to return
@ -157,9 +122,6 @@ pgxml_parser_init(void)
pgxml_errorMsg = NULL;
xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
/* Replace libxml memory handling (DANGEROUS) */
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
/* Initialize libxml */
xmlInitParser();
@ -627,7 +589,7 @@ xpath_table(PG_FUNCTION_ARGS)
int j;
int rownr; /* For issuing multiple rows from one original
* document */
int had_values; /* To determine end of nodeset results */
bool had_values; /* To determine end of nodeset results */
StringInfoData query_buf;
/* We only have a valid tuple description in table function mode */
@ -654,7 +616,6 @@ xpath_table(PG_FUNCTION_ARGS)
* The tuplestore must exist in a higher context than this function call
* (per_query_ctx is used)
*/
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx);
@ -671,6 +632,12 @@ xpath_table(PG_FUNCTION_ARGS)
/* get the requested return tuple description */
ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
/* must have at least one output column (for the pkey) */
if (ret_tupdesc->natts < 1)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("xpath_table must have at least one output column")));
/*
* At the moment we assume that the returned attributes make sense for the
* XPath specififed (i.e. we trust the caller). It's not fatal if they get
@ -686,26 +653,27 @@ xpath_table(PG_FUNCTION_ARGS)
rsinfo->setDesc = ret_tupdesc;
values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
/* Split XPaths. xpathset is a writable CString. */
/* Note that we stop splitting once we've done all needed for tupdesc */
/*
* Split XPaths. xpathset is a writable CString.
*
* Note that we stop splitting once we've done all needed for tupdesc
*/
numpaths = 0;
pos = xpathset;
do
while (numpaths < (ret_tupdesc->natts - 1))
{
xpaths[numpaths] = (xmlChar *) pos;
xpaths[numpaths++] = (xmlChar *) pos;
pos = strstr(pos, pathsep);
if (pos != NULL)
{
*pos = '\0';
pos++;
}
numpaths++;
} while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1)));
else
break;
}
/* Now build query */
initStringInfo(&query_buf);
@ -800,7 +768,7 @@ xpath_table(PG_FUNCTION_ARGS)
do
{
/* Now evaluate the set of xpaths. */
had_values = 0;
had_values = false;
for (j = 0; j < numpaths; j++)
{
ctxt = xmlXPathNewContext(doctree);
@ -813,10 +781,7 @@ xpath_table(PG_FUNCTION_ARGS)
{
xmlCleanupParser();
xmlFreeDoc(doctree);
elog_error("XPath Syntax Error", true);
PG_RETURN_NULL(); /* Keep compiler happy */
}
/* Now evaluate the path expression. */
@ -829,11 +794,12 @@ xpath_table(PG_FUNCTION_ARGS)
{
case XPATH_NODESET:
/* We see if this nodeset has enough nodes */
if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr))
if (res->nodesetval != NULL &&
rownr < res->nodesetval->nodeNr)
{
resstr =
xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
had_values = 1;
had_values = true;
}
else
resstr = NULL;