postgresql/contrib/xml2/xpath.c

846 lines
20 KiB
C
Raw Normal View History

/*
2010-09-20 22:08:53 +02:00
* contrib/xml2/xpath.c
*
* Parser interface for DOM-based parser (libxml) rather than
* stream-based SAX-type parser
*/
#include "postgres.h"
#include "access/htup_details.h"
#include "executor/spi.h"
#include "fmgr.h"
#include "funcapi.h"
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/xml.h"
/* libxml includes */
#include <libxml/xpath.h>
#include <libxml/tree.h>
#include <libxml/xmlmemory.h>
#include <libxml/xmlerror.h>
#include <libxml/parserInternals.h>
PG_MODULE_MAGIC;
/* exported for use by xslt_proc.c */
PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
/* workspace for pgxml_xpath() */
typedef struct
{
xmlDocPtr doctree;
xmlXPathContextPtr ctxt;
xmlXPathObjectPtr res;
} xpath_workspace;
/* local declarations */
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
xmlChar *toptagname, xmlChar *septagname,
xmlChar *plainsep);
static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
xmlChar *septag, xmlChar *plainsep);
static xmlChar *pgxml_texttoxmlchar(text *textstring);
static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath,
xpath_workspace *workspace);
static void cleanup_workspace(xpath_workspace *workspace);
/*
* Initialize for xml parsing.
*
* As with the underlying pg_xml_init function, calls to this MUST be followed
* by a PG_TRY block that guarantees that pg_xml_done is called.
*/
PgXmlErrorContext *
pgxml_parser_init(PgXmlStrictness strictness)
{
PgXmlErrorContext *xmlerrcxt;
/* Set up error handling (we share the core's error handler) */
xmlerrcxt = pg_xml_init(strictness);
/* Note: we're assuming an elog cannot be thrown by the following calls */
/* Initialize libxml */
xmlInitParser();
xmlSubstituteEntitiesDefault(1);
xmlLoadExtDtdDefaultValue = 1;
return xmlerrcxt;
}
/*
* Returns true if document is well-formed
*
* Note: this has been superseded by a core function. We still have to
* have it in the contrib module so that existing SQL-level references
* to the function won't fail; but in normal usage with up-to-date SQL
* definitions for the contrib module, this won't be called.
*/
PG_FUNCTION_INFO_V1(xml_is_well_formed);
Datum
xml_is_well_formed(PG_FUNCTION_ARGS)
{
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
text *t = PG_GETARG_TEXT_PP(0); /* document buffer */
bool result = false;
int32 docsize = VARSIZE_ANY_EXHDR(t);
xmlDocPtr doctree;
PgXmlErrorContext *xmlerrcxt;
xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
PG_TRY();
{
doctree = xmlParseMemory((char *) VARDATA_ANY(t), docsize);
result = (doctree != NULL);
if (doctree != NULL)
xmlFreeDoc(doctree);
}
PG_CATCH();
{
pg_xml_done(xmlerrcxt, true);
PG_RE_THROW();
}
PG_END_TRY();
pg_xml_done(xmlerrcxt, false);
PG_RETURN_BOOL(result);
}
/* Encodes special characters (<, >, &, " and \r) as XML entities */
PG_FUNCTION_INFO_V1(xml_encode_special_chars);
Datum
xml_encode_special_chars(PG_FUNCTION_ARGS)
{
text *tin = PG_GETARG_TEXT_PP(0);
2005-10-15 04:49:52 +02:00
text *tout;
xmlChar *ts,
*tt;
ts = pgxml_texttoxmlchar(tin);
tt = xmlEncodeSpecialChars(NULL, ts);
pfree(ts);
tout = cstring_to_text((char *) tt);
xmlFree(tt);
PG_RETURN_TEXT_P(tout);
}
/*
* Function translates a nodeset into a text representation
*
* iterates over each node in the set and calls xmlNodeDump to write it to
* an xmlBuffer -from which an xmlChar * string is returned.
*
* each representation is surrounded by <tagname> ... </tagname>
*
* plainsep is an ordinary (not tag) separator - if used, then nodes are
* cast to string as output method
*/
static xmlChar *
pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
xmlChar *toptagname,
xmlChar *septagname,
xmlChar *plainsep)
{
xmlBufferPtr buf;
xmlChar *result;
int i;
buf = xmlBufferCreate();
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
{
xmlBufferWriteChar(buf, "<");
xmlBufferWriteCHAR(buf, toptagname);
xmlBufferWriteChar(buf, ">");
}
if (nodeset != NULL)
{
for (i = 0; i < nodeset->nodeNr; i++)
{
2004-08-29 07:07:03 +02:00
if (plainsep != NULL)
{
2004-08-29 07:07:03 +02:00
xmlBufferWriteCHAR(buf,
xmlXPathCastNodeToString(nodeset->nodeTab[i]));
2004-08-29 07:07:03 +02:00
/* If this isn't the last entry, write the plain sep. */
if (i < (nodeset->nodeNr) - 1)
xmlBufferWriteChar(buf, (char *) plainsep);
2004-08-29 07:07:03 +02:00
}
else
{
2004-08-29 07:07:03 +02:00
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
{
xmlBufferWriteChar(buf, "<");
xmlBufferWriteCHAR(buf, septagname);
xmlBufferWriteChar(buf, ">");
}
xmlNodeDump(buf,
nodeset->nodeTab[i]->doc,
nodeset->nodeTab[i],
1, 0);
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
{
xmlBufferWriteChar(buf, "</");
xmlBufferWriteCHAR(buf, septagname);
xmlBufferWriteChar(buf, ">");
}
}
}
}
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
{
xmlBufferWriteChar(buf, "</");
xmlBufferWriteCHAR(buf, toptagname);
xmlBufferWriteChar(buf, ">");
}
result = xmlStrdup(buf->content);
xmlBufferFree(buf);
return result;
}
/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
* into the libxml2 representation
*/
static xmlChar *
pgxml_texttoxmlchar(text *textstring)
{
return (xmlChar *) text_to_cstring(textstring);
}
/* Publicly visible XPath functions */
/*
* This is a "raw" xpath function. Check that it returns child elements
* properly
*/
PG_FUNCTION_INFO_V1(xpath_nodeset);
Datum
xpath_nodeset(PG_FUNCTION_ARGS)
{
text *document = PG_GETARG_TEXT_PP(0);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
xmlChar *xpath;
text *xpres;
xmlXPathObjectPtr res;
xpath_workspace workspace;
xpath = pgxml_texttoxmlchar(xpathsupp);
res = pgxml_xpath(document, xpath, &workspace);
xpres = pgxml_result_to_text(res, toptag, septag, NULL);
cleanup_workspace(&workspace);
pfree(xpath);
2004-08-29 07:07:03 +02:00
if (xpres == NULL)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(xpres);
}
/*
* The following function is almost identical, but returns the elements in
* a list.
*/
PG_FUNCTION_INFO_V1(xpath_list);
Datum
xpath_list(PG_FUNCTION_ARGS)
{
text *document = PG_GETARG_TEXT_PP(0);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
xmlChar *xpath;
text *xpres;
xmlXPathObjectPtr res;
xpath_workspace workspace;
xpath = pgxml_texttoxmlchar(xpathsupp);
res = pgxml_xpath(document, xpath, &workspace);
xpres = pgxml_result_to_text(res, NULL, NULL, plainsep);
cleanup_workspace(&workspace);
pfree(xpath);
2004-08-29 07:07:03 +02:00
if (xpres == NULL)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(xpres);
}
PG_FUNCTION_INFO_V1(xpath_string);
Datum
xpath_string(PG_FUNCTION_ARGS)
{
text *document = PG_GETARG_TEXT_PP(0);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
2004-08-29 07:07:03 +02:00
xmlChar *xpath;
int32 pathsize;
text *xpres;
xmlXPathObjectPtr res;
xpath_workspace workspace;
pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
2004-08-29 07:07:03 +02:00
/*
2005-10-15 04:49:52 +02:00
* We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
* at end
2004-08-29 07:07:03 +02:00
*/
/* We could try casting to string using the libxml function? */
2004-08-29 07:07:03 +02:00
xpath = (xmlChar *) palloc(pathsize + 9);
memcpy((char *) xpath, "string(", 7);
memcpy((char *) (xpath + 7), VARDATA_ANY(xpathsupp), pathsize);
2004-08-29 07:07:03 +02:00
xpath[pathsize + 7] = ')';
xpath[pathsize + 8] = '\0';
res = pgxml_xpath(document, xpath, &workspace);
xpres = pgxml_result_to_text(res, NULL, NULL, NULL);
cleanup_workspace(&workspace);
pfree(xpath);
2004-08-29 07:07:03 +02:00
if (xpres == NULL)
PG_RETURN_NULL();
PG_RETURN_TEXT_P(xpres);
}
PG_FUNCTION_INFO_V1(xpath_number);
Datum
xpath_number(PG_FUNCTION_ARGS)
{
text *document = PG_GETARG_TEXT_PP(0);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
2004-08-29 07:07:03 +02:00
xmlChar *xpath;
float4 fRes;
xmlXPathObjectPtr res;
xpath_workspace workspace;
xpath = pgxml_texttoxmlchar(xpathsupp);
res = pgxml_xpath(document, xpath, &workspace);
pfree(xpath);
if (res == NULL)
2004-08-29 07:07:03 +02:00
PG_RETURN_NULL();
fRes = xmlXPathCastToNumber(res);
cleanup_workspace(&workspace);
if (xmlXPathIsNaN(fRes))
2004-08-29 07:07:03 +02:00
PG_RETURN_NULL();
PG_RETURN_FLOAT4(fRes);
}
PG_FUNCTION_INFO_V1(xpath_bool);
Datum
xpath_bool(PG_FUNCTION_ARGS)
{
text *document = PG_GETARG_TEXT_PP(0);
Phase 2 of pgindent updates. Change pg_bsd_indent to follow upstream rules for placement of comments to the right of code, and remove pgindent hack that caused comments following #endif to not obey the general rule. Commit e3860ffa4dd0dad0dd9eea4be9cc1412373a8c89 wasn't actually using the published version of pg_bsd_indent, but a hacked-up version that tried to minimize the amount of movement of comments to the right of code. The situation of interest is where such a comment has to be moved to the right of its default placement at column 33 because there's code there. BSD indent has always moved right in units of tab stops in such cases --- but in the previous incarnation, indent was working in 8-space tab stops, while now it knows we use 4-space tabs. So the net result is that in about half the cases, such comments are placed one tab stop left of before. This is better all around: it leaves more room on the line for comment text, and it means that in such cases the comment uniformly starts at the next 4-space tab stop after the code, rather than sometimes one and sometimes two tabs after. Also, ensure that comments following #endif are indented the same as comments following other preprocessor commands such as #else. That inconsistency turns out to have been self-inflicted damage from a poorly-thought-through post-indent "fixup" in pgindent. This patch is much less interesting than the first round of indent changes, but also bulkier, so I thought it best to separate the effects. Discussion: https://postgr.es/m/E1dAmxK-0006EE-1r@gemulon.postgresql.org Discussion: https://postgr.es/m/30527.1495162840@sss.pgh.pa.us
2017-06-21 21:18:54 +02:00
text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
2004-08-29 07:07:03 +02:00
xmlChar *xpath;
int bRes;
xmlXPathObjectPtr res;
xpath_workspace workspace;
xpath = pgxml_texttoxmlchar(xpathsupp);
res = pgxml_xpath(document, xpath, &workspace);
pfree(xpath);
if (res == NULL)
2004-08-29 07:07:03 +02:00
PG_RETURN_BOOL(false);
bRes = xmlXPathCastToBoolean(res);
cleanup_workspace(&workspace);
PG_RETURN_BOOL(bRes);
}
/* Core function to evaluate XPath query */
static xmlXPathObjectPtr
pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
2004-08-29 07:07:03 +02:00
{
int32 docsize = VARSIZE_ANY_EXHDR(document);
PgXmlErrorContext *xmlerrcxt;
xmlXPathCompExprPtr comppath;
workspace->doctree = NULL;
workspace->ctxt = NULL;
workspace->res = NULL;
xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
PG_TRY();
{
workspace->doctree = xmlParseMemory((char *) VARDATA_ANY(document),
docsize);
if (workspace->doctree != NULL)
{
workspace->ctxt = xmlXPathNewContext(workspace->doctree);
workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
/* compile the path */
comppath = xmlXPathCompile(xpath);
if (comppath == NULL)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
"XPath Syntax Error");
/* Now evaluate the path expression. */
workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
xmlXPathFreeCompExpr(comppath);
}
}
PG_CATCH();
{
cleanup_workspace(workspace);
pg_xml_done(xmlerrcxt, true);
PG_RE_THROW();
}
PG_END_TRY();
if (workspace->res == NULL)
cleanup_workspace(workspace);
pg_xml_done(xmlerrcxt, false);
return workspace->res;
2004-08-29 07:07:03 +02:00
}
/* Clean up after processing the result of pgxml_xpath() */
static void
cleanup_workspace(xpath_workspace *workspace)
{
if (workspace->res)
xmlXPathFreeObject(workspace->res);
workspace->res = NULL;
if (workspace->ctxt)
xmlXPathFreeContext(workspace->ctxt);
workspace->ctxt = NULL;
if (workspace->doctree)
xmlFreeDoc(workspace->doctree);
workspace->doctree = NULL;
}
static text *
2004-08-29 07:07:03 +02:00
pgxml_result_to_text(xmlXPathObjectPtr res,
xmlChar *toptag,
xmlChar *septag,
xmlChar *plainsep)
{
2004-08-29 07:07:03 +02:00
xmlChar *xpresstr;
text *xpres;
if (res == NULL)
return NULL;
switch (res->type)
{
case XPATH_NODESET:
xpresstr = pgxmlNodeSetToText(res->nodesetval,
2004-08-29 07:07:03 +02:00
toptag,
septag, plainsep);
break;
case XPATH_STRING:
xpresstr = xmlStrdup(res->stringval);
break;
default:
elog(NOTICE, "unsupported XQuery result: %d", res->type);
xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
}
/* Now convert this result back to text */
xpres = cstring_to_text((char *) xpresstr);
/* Free various storage */
xmlFree(xpresstr);
return xpres;
}
/*
* xpath_table is a table function. It needs some tidying (as do the
* other functions here!
*/
PG_FUNCTION_INFO_V1(xpath_table);
2004-08-29 07:07:03 +02:00
Datum
xpath_table(PG_FUNCTION_ARGS)
{
/* Function parameters */
char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
/* SPI (input tuple) support */
2004-08-29 07:07:03 +02:00
SPITupleTable *tuptable;
HeapTuple spi_tuple;
TupleDesc spi_tupdesc;
/* Output tuple (tuplestore) support */
2004-08-29 07:07:03 +02:00
Tuplestorestate *tupstore = NULL;
TupleDesc ret_tupdesc;
HeapTuple ret_tuple;
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
AttInMetadata *attinmeta;
MemoryContext per_query_ctx;
MemoryContext oldcontext;
char **values;
xmlChar **xpaths;
char *pos;
const char *pathsep = "|";
2004-08-29 07:07:03 +02:00
int numpaths;
int ret;
uint64 proc;
2004-08-29 07:07:03 +02:00
int j;
2005-10-15 04:49:52 +02:00
int rownr; /* For issuing multiple rows from one original
* document */
bool had_values; /* To determine end of nodeset results */
2006-10-04 02:30:14 +02:00
StringInfoData query_buf;
PgXmlErrorContext *xmlerrcxt;
volatile xmlDocPtr doctree = NULL;
/* We only have a valid tuple description in table function mode */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
2004-08-29 07:07:03 +02:00
if (rsinfo->expectedDesc == NULL)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("xpath_table must be called as a table function")));
2004-08-29 07:07:03 +02:00
/*
* We want to materialise because it means that we don't have to carry
* libxml2 parser state between invocations of this function
*/
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("xpath_table requires Materialize mode, but it is not "
"allowed in this context")));
2005-10-15 04:49:52 +02:00
/*
* The tuplestore must exist in a higher context than this function call
* (per_query_ctx is used)
*/
2004-08-29 07:07:03 +02:00
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx);
2005-10-15 04:49:52 +02:00
/*
* Create the tuplestore - work_mem is the max in-memory size before a
* file is created on disk to hold it.
*/
tupstore =
tuplestore_begin_heap(rsinfo->allowedModes & SFRM_Materialize_Random,
false, work_mem);
2004-08-29 07:07:03 +02:00
MemoryContextSwitchTo(oldcontext);
2004-08-29 07:07:03 +02:00
/* get the requested return tuple description */
ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
/* must have at least one output column (for the pkey) */
if (ret_tupdesc->natts < 1)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("xpath_table must have at least one output column")));
2004-08-29 07:07:03 +02:00
/*
2005-10-15 04:49:52 +02:00
* At the moment we assume that the returned attributes make sense for the
* XPath specified (i.e. we trust the caller). It's not fatal if they get
2005-10-15 04:49:52 +02:00
* it wrong - the input function for the column type will raise an error
* if the path result can't be converted into the correct binary
* representation.
2004-08-29 07:07:03 +02:00
*/
2004-08-29 07:07:03 +02:00
attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
2004-08-29 07:07:03 +02:00
/* Set return mode and allocate value space. */
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setDesc = ret_tupdesc;
2004-08-29 07:07:03 +02:00
values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
/*
* Split XPaths. xpathset is a writable CString.
*
* Note that we stop splitting once we've done all needed for tupdesc
*/
2004-08-29 07:07:03 +02:00
numpaths = 0;
pos = xpathset;
while (numpaths < (ret_tupdesc->natts - 1))
2004-08-29 07:07:03 +02:00
{
xpaths[numpaths++] = (xmlChar *) pos;
2004-08-29 07:07:03 +02:00
pos = strstr(pos, pathsep);
if (pos != NULL)
{
*pos = '\0';
pos++;
}
else
break;
}
2004-08-29 07:07:03 +02:00
/* Now build query */
initStringInfo(&query_buf);
2004-08-29 07:07:03 +02:00
/* Build initial sql statement */
appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
2004-08-29 07:07:03 +02:00
pkeyfield,
xmlfield,
relname,
condition);
2004-08-29 07:07:03 +02:00
if ((ret = SPI_connect()) < 0)
elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
elog(ERROR, "xpath_table: SPI execution failed for query %s",
query_buf.data);
2004-08-29 07:07:03 +02:00
proc = SPI_processed;
tuptable = SPI_tuptable;
spi_tupdesc = tuptable->tupdesc;
/* Switch out of SPI context */
2004-08-29 07:07:03 +02:00
MemoryContextSwitchTo(oldcontext);
/*
* Check that SPI returned correct result. If you put a comma into one of
* the function parameters, this will catch it when the SPI query returns
* e.g. 3 columns.
*/
2004-08-29 07:07:03 +02:00
if (spi_tupdesc->natts != 2)
{
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("expression returning multiple columns is not valid in parameter list"),
errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
2004-08-29 07:07:03 +02:00
}
/*
2010-07-06 21:19:02 +02:00
* Setup the parser. This should happen after we are done evaluating the
* query, in case it calls functions that set up libxml differently.
*/
xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
2004-08-29 07:07:03 +02:00
PG_TRY();
{
/* For each row i.e. document returned from SPI */
uint64 i;
for (i = 0; i < proc; i++)
{
char *pkey;
char *xmldoc;
xmlXPathContextPtr ctxt;
xmlXPathObjectPtr res;
xmlChar *resstr;
xmlXPathCompExprPtr comppath;
/* Extract the row data as C Strings */
spi_tuple = tuptable->vals[i];
pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
/*
* Clear the values array, so that not-well-formed documents
* return NULL in all columns. Note that this also means that
* spare columns will be NULL.
*/
for (j = 0; j < ret_tupdesc->natts; j++)
values[j] = NULL;
/* Insert primary key */
values[0] = pkey;
/* Parse the document */
if (xmldoc)
doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
2017-06-21 20:39:04 +02:00
else /* treat NULL as not well-formed */
doctree = NULL;
if (doctree == NULL)
2004-08-29 07:07:03 +02:00
{
/* not well-formed, so output all-NULL tuple */
ret_tuple = BuildTupleFromCStrings(attinmeta, values);
tuplestore_puttuple(tupstore, ret_tuple);
heap_freetuple(ret_tuple);
}
else
{
/* New loop here - we have to deal with nodeset results */
rownr = 0;
do
2004-08-29 07:07:03 +02:00
{
/* Now evaluate the set of xpaths. */
had_values = false;
for (j = 0; j < numpaths; j++)
{
ctxt = xmlXPathNewContext(doctree);
ctxt->node = xmlDocGetRootElement(doctree);
2004-08-29 07:07:03 +02:00
/* compile the path */
comppath = xmlXPathCompile(xpaths[j]);
if (comppath == NULL)
xml_ereport(xmlerrcxt, ERROR,
ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
"XPath Syntax Error");
2004-08-29 07:07:03 +02:00
/* Now evaluate the path expression. */
res = xmlXPathCompiledEval(comppath, ctxt);
xmlXPathFreeCompExpr(comppath);
2004-08-29 07:07:03 +02:00
if (res != NULL)
2004-08-29 07:07:03 +02:00
{
switch (res->type)
{
case XPATH_NODESET:
/* We see if this nodeset has enough nodes */
if (res->nodesetval != NULL &&
rownr < res->nodesetval->nodeNr)
{
resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
had_values = true;
}
else
resstr = NULL;
break;
case XPATH_STRING:
resstr = xmlStrdup(res->stringval);
break;
default:
elog(NOTICE, "unsupported XQuery result: %d", res->type);
resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
}
/*
* Insert this into the appropriate column in the
* result tuple.
*/
values[j + 1] = (char *) resstr;
2004-08-29 07:07:03 +02:00
}
xmlXPathFreeContext(ctxt);
2004-08-29 07:07:03 +02:00
}
/* Now add the tuple to the output, if there is one. */
if (had_values)
{
ret_tuple = BuildTupleFromCStrings(attinmeta, values);
tuplestore_puttuple(tupstore, ret_tuple);
heap_freetuple(ret_tuple);
}
2004-08-29 07:07:03 +02:00
rownr++;
} while (had_values);
}
2004-08-29 07:07:03 +02:00
if (doctree != NULL)
xmlFreeDoc(doctree);
doctree = NULL;
2004-08-29 07:07:03 +02:00
if (pkey)
pfree(pkey);
if (xmldoc)
pfree(xmldoc);
}
}
PG_CATCH();
{
if (doctree != NULL)
xmlFreeDoc(doctree);
pg_xml_done(xmlerrcxt, true);
PG_RE_THROW();
}
PG_END_TRY();
if (doctree != NULL)
xmlFreeDoc(doctree);
pg_xml_done(xmlerrcxt, false);
2004-08-29 07:07:03 +02:00
tuplestore_donestoring(tupstore);
SPI_finish();
rsinfo->setResult = tupstore;
/*
* SFRM_Materialize mode expects us to return a NULL Datum. The actual
2005-10-15 04:49:52 +02:00
* tuples are in our tuplestore and passed back through rsinfo->setResult.
* rsinfo->setDesc is set to the tuple description that we actually used
* to build our tuples with, so the caller can verify we did what it was
* expecting.
2004-08-29 07:07:03 +02:00
*/
return (Datum) 0;
}