postgresql/contrib/xml/pgxml.c
Bruce Momjian 92288a1cf9 Change made to elog:
o  Change all current CVS messages of NOTICE to WARNING.  We were going
to do this just before 7.3 beta but it has to be done now, as you will
see below.

o Change current INFO messages that should be controlled by
client_min_messages to NOTICE.

o Force remaining INFO messages, like from EXPLAIN, VACUUM VERBOSE, etc.
to always go to the client.

o Remove INFO from the client_min_messages options and add NOTICE.

Seems we do need three non-ERROR elog levels to handle the various
behaviors we need for these messages.

Regression passed.
2002-03-06 06:10:59 +00:00

348 lines
6.9 KiB
C

/********************************************************
* Interface code to parse an XML document using expat
********************************************************/
#include "postgres.h"
#include "fmgr.h"
#include "expat.h"
#include "pgxml.h"
/* Memory management - we make expat use standard pg MM */
XML_Memory_Handling_Suite mhs;
/* passthrough functions (palloc is a macro) */
static void *
pgxml_palloc(size_t size)
{
return palloc(size);
}
static void *
pgxml_repalloc(void *ptr, size_t size)
{
return repalloc(ptr, size);
}
static void
pgxml_pfree(void *ptr)
{
return pfree(ptr);
}
static void
pgxml_mhs_init()
{
mhs.malloc_fcn = pgxml_palloc;
mhs.realloc_fcn = pgxml_repalloc;
mhs.free_fcn = pgxml_pfree;
}
static void
pgxml_handler_init()
{
/*
* This code should set up the relevant handlers from user-supplied
* settings. Quite how these settings are made is another matter :)
*/
}
/* Returns true if document is well-formed */
PG_FUNCTION_INFO_V1(pgxml_parse);
Datum
pgxml_parse(PG_FUNCTION_ARGS)
{
/* called as pgxml_parse(document) */
XML_Parser p;
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
int32 docsize = VARSIZE(t) - VARHDRSZ;
pgxml_mhs_init();
pgxml_handler_init();
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
if (!p)
{
elog(ERROR, "pgxml: Could not create expat parser");
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
}
if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
{
/*
* elog(WARNING, "Parse error at line %d:%s",
* XML_GetCurrentLineNumber(p),
* XML_ErrorString(XML_GetErrorCode(p)));
*/
XML_ParserFree(p);
PG_RETURN_BOOL(false);
}
XML_ParserFree(p);
PG_RETURN_BOOL(true);
}
/* XPath handling functions */
/* XPath support here is for a very skeletal kind of XPath!
It was easy to program though... */
/* This first is the core function that builds a result set. The
actual functions called by the user manipulate that result set
in various ways.
*/
static XPath_Results *
build_xpath_results(text *doc, text *pathstr)
{
XPath_Results *xpr;
char *res;
pgxml_udata *udata;
XML_Parser p;
int32 docsize;
xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
memset((void *) xpr, 0, sizeof(XPath_Results));
xpr->rescount = 0;
docsize = VARSIZE(doc) - VARHDRSZ;
/* res isn't going to be the real return type, it is just a buffer */
res = (char *) palloc(docsize);
memset((void *) res, 0, docsize);
xpr->resbuf = res;
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
memset((void *) udata, 0, sizeof(pgxml_udata));
udata->currentpath[0] = '\0';
udata->textgrab = 0;
udata->path = (char *) palloc(VARSIZE(pathstr));
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
udata->resptr = res;
udata->reslen = 0;
udata->xpres = xpr;
/* Now fire up the parser */
pgxml_mhs_init();
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
if (!p)
{
elog(ERROR, "pgxml: Could not create expat parser");
pfree(xpr);
pfree(udata->path);
pfree(udata);
pfree(res);
return NULL;
}
XML_SetUserData(p, (void *) udata);
/* Set the handlers */
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
XML_SetCharacterDataHandler(p, pgxml_charhandler);
if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
{
/*
* elog(WARNING, "Parse error at line %d:%s",
* XML_GetCurrentLineNumber(p),
* XML_ErrorString(XML_GetErrorCode(p)));
*/
XML_ParserFree(p);
pfree(xpr);
pfree(udata->path);
pfree(udata);
return NULL;
}
pfree(udata->path);
pfree(udata);
XML_ParserFree(p);
return xpr;
}
PG_FUNCTION_INFO_V1(pgxml_xpath);
Datum
pgxml_xpath(PG_FUNCTION_ARGS)
{
/* called as pgxml_xpath(document,pathstr, index) for the moment */
XPath_Results *xpresults;
text *restext;
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
text *t2 = PG_GETARG_TEXT_P(1);
int32 ind = PG_GETARG_INT32(2) - 1;
xpresults = build_xpath_results(t, t2);
/*
* This needs to be changed depending on the mechanism for returning
* our set of results.
*/
if (xpresults == NULL) /* parse error (not WF or parser failure) */
PG_RETURN_NULL();
if (ind >= (xpresults->rescount))
PG_RETURN_NULL();
restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
pfree(xpresults->resbuf);
pfree(xpresults);
PG_RETURN_TEXT_P(restext);
}
static void
pgxml_pathcompare(void *userData)
{
char *matchpos;
matchpos = strstr(UD->currentpath, UD->path);
if (matchpos == NULL)
{ /* Should we have more logic here ? */
if (UD->textgrab)
{
UD->textgrab = 0;
pgxml_finalisegrabbedtext(userData);
}
return;
}
/*
* OK, we have a match of some sort. Now we need to check that our
* match is anchored to the *end* of the string AND that it is
* immediately preceded by a '/'
*/
/*
* This test wouldn't work if strlen (UD->path) overran the length of
* the currentpath, but that's not possible because we got a match!
*/
if ((matchpos + strlen(UD->path))[0] == '\0')
{
if ((UD->path)[0] == '/')
{
if (matchpos == UD->currentpath)
UD->textgrab = 1;
}
else
{
if ((matchpos - 1)[0] == '/')
UD->textgrab = 1;
}
}
}
static void
pgxml_starthandler(void *userData, const XML_Char * name,
const XML_Char ** atts)
{
char sepstr[] = "/";
if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
elog(WARNING, "Path too long");
else
{
strncat(UD->currentpath, sepstr, 1);
strcat(UD->currentpath, name);
}
if (UD->textgrab)
{
/*
* Depending on user preference, should we "reconstitute" the
* element into the result text?
*/
}
else
pgxml_pathcompare(userData);
}
static void
pgxml_endhandler(void *userData, const XML_Char * name)
{
/*
* Start by removing the current element off the end of the
* currentpath
*/
char *sepptr;
sepptr = strrchr(UD->currentpath, '/');
if (sepptr == NULL)
{
elog(ERROR, "There's a problem...");
sepptr = UD->currentpath;
}
if (strcmp(name, sepptr + 1) != 0)
{
elog(WARNING, "Wanted [%s], got [%s]", sepptr, name);
/* unmatched entry, so do nothing */
}
else
{
sepptr[0] = '\0'; /* Chop that element off the end */
}
if (UD->textgrab)
pgxml_pathcompare(userData);
}
static void
pgxml_charhandler(void *userData, const XML_Char * s, int len)
{
if (UD->textgrab)
{
if (len > 0)
{
memcpy(UD->resptr, s, len);
UD->resptr += len;
UD->reslen += len;
}
}
}
/* Should I be using PG list types here? */
static void
pgxml_finalisegrabbedtext(void *userData)
{
/* In res/reslen, we have a single result. */
UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
UD->reslen = 0;
UD->xpres->rescount++;
/*
* This effectively concatenates all the results together but we do
* know where one ends and the next begins
*/
}