postgresql/contrib/xml/pgxml.c

311 lines
7.0 KiB
C
Raw Normal View History

/********************************************************
* Interface code to parse an XML document using expat
********************************************************/
#include "postgres.h"
#include "fmgr.h"
#include "expat.h"
#include "pgxml.h"
/* Memory management - we make expat use standard pg MM */
XML_Memory_Handling_Suite mhs;
/* passthrough functions (palloc is a macro) */
static void *pgxml_palloc(size_t size)
{
return palloc(size);
}
static void *pgxml_repalloc(void *ptr, size_t size)
{
return repalloc(ptr,size);
}
static void pgxml_pfree(void *ptr)
{
return pfree(ptr);
}
static void pgxml_mhs_init()
{
mhs.malloc_fcn = pgxml_palloc;
mhs.realloc_fcn = pgxml_repalloc;
mhs.free_fcn = pgxml_pfree;
}
static void pgxml_handler_init()
{
/* This code should set up the relevant handlers from user-supplied
settings. Quite how these settings are made is another matter :) */
}
/* Returns true if document is well-formed */
PG_FUNCTION_INFO_V1(pgxml_parse);
Datum
pgxml_parse(PG_FUNCTION_ARGS)
{
/* called as pgxml_parse(document) */
XML_Parser p;
text *t = PG_GETARG_TEXT_P(0); /*document buffer */
int32 docsize = VARSIZE(t) - VARHDRSZ;
pgxml_mhs_init();
pgxml_handler_init();
p = XML_ParserCreate_MM(NULL,&mhs,NULL);
if (! p) {
elog(ERROR, "pgxml: Could not create expat parser");
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
}
if (! XML_Parse(p, (char *)VARDATA(t) , docsize, 1)) {
/* elog(NOTICE, "Parse error at line %d:%s",
XML_GetCurrentLineNumber(p),
XML_ErrorString(XML_GetErrorCode(p))); */
XML_ParserFree(p);
PG_RETURN_BOOL(false);
}
XML_ParserFree(p);
PG_RETURN_BOOL(true);
}
/* XPath handling functions */
/* XPath support here is for a very skeletal kind of XPath!
It was easy to program though... */
/* This first is the core function that builds a result set. The
actual functions called by the user manipulate that result set
in various ways.
*/
static XPath_Results *build_xpath_results(text *doc, text *pathstr)
{
XPath_Results *xpr;
char *res;
pgxml_udata *udata;
XML_Parser p;
int32 docsize;
xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
memset((void *)xpr, 0, sizeof(XPath_Results));
xpr->rescount=0;
docsize=VARSIZE(doc)-VARHDRSZ;
/* res isn't going to be the real return type, it is just a buffer */
res = (char *) palloc(docsize);
memset((void *)res, 0, docsize);
xpr->resbuf = res;
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
memset((void *)udata,0,sizeof(pgxml_udata));
udata->currentpath[0]='\0';
udata->textgrab=0;
udata->path= (char *) palloc(VARSIZE(pathstr));
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr)-VARHDRSZ);
udata->path[VARSIZE(pathstr)-VARHDRSZ]='\0';
udata->resptr = res;
udata->reslen = 0;
udata->xpres = xpr;
/* Now fire up the parser */
pgxml_mhs_init();
p = XML_ParserCreate_MM(NULL,&mhs,NULL);
if (! p) {
elog(ERROR, "pgxml: Could not create expat parser");
pfree(xpr);
pfree(udata->path);
pfree(udata);
pfree(res);
return NULL;
}
XML_SetUserData(p, (void *)udata);
/* Set the handlers */
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
XML_SetCharacterDataHandler(p, pgxml_charhandler);
if (! XML_Parse(p, (char *)VARDATA(doc) , docsize, 1)) {
/* elog(NOTICE, "Parse error at line %d:%s",
XML_GetCurrentLineNumber(p),
XML_ErrorString(XML_GetErrorCode(p))); */
XML_ParserFree(p);
pfree(xpr);
pfree(udata->path);
pfree(udata);
return NULL;
}
pfree(udata->path);
pfree(udata);
XML_ParserFree(p);
return xpr;
}
PG_FUNCTION_INFO_V1(pgxml_xpath);
Datum
pgxml_xpath(PG_FUNCTION_ARGS)
{
/* called as pgxml_xpath(document,pathstr, index) for the moment*/
XPath_Results *xpresults;
text *restext;
text *t = PG_GETARG_TEXT_P(0); /*document buffer */
text *t2= PG_GETARG_TEXT_P(1);
int32 ind = PG_GETARG_INT32(2) - 1;
xpresults = build_xpath_results(t,t2);
/* This needs to be changed depending on the mechanism for returning
our set of results. */
if (xpresults==NULL) /*parse error (not WF or parser failure) */
{
PG_RETURN_NULL();
}
if (ind >= (xpresults->rescount))
{
PG_RETURN_NULL();
}
restext = (text *) palloc(xpresults->reslens[ind]+VARHDRSZ);
memcpy(VARDATA(restext),xpresults->results[ind],xpresults->reslens[ind]);
VARATT_SIZEP(restext) = xpresults->reslens[ind]+VARHDRSZ;
pfree(xpresults->resbuf);
pfree(xpresults);
PG_RETURN_TEXT_P(restext);
}
static void pgxml_pathcompare(void *userData)
{
char *matchpos;
matchpos=strstr(UD->currentpath, UD->path);
if (matchpos == NULL) { /* Should we have more logic here ? */
if (UD->textgrab) {
UD->textgrab=0;
pgxml_finalisegrabbedtext(userData);
}
return;
}
/* OK, we have a match of some sort. Now we need to check that
our match is anchored to the *end* of the string AND
that it is immediately preceded by a '/'*/
/* This test wouldn't work if strlen (UD->path) overran the length
of the currentpath, but that's not possible because we got a match! */
if ((matchpos + strlen(UD->path))[0]=='\0')
{
if ((UD->path)[0]=='/') {
if (matchpos == UD->currentpath) {
UD->textgrab=1;
}
} else {
if ((matchpos-1)[0]=='/') {
UD->textgrab=1;
}
}
}
}
static void pgxml_starthandler(void *userData, const XML_Char *name,
const XML_Char **atts)
{
char sepstr[]="/";
if ((strlen(name)+strlen(UD->currentpath))>MAXPATHLENGTH-2) {
elog(NOTICE,"Path too long");
} else {
strncat(UD->currentpath,sepstr,1);
strcat(UD->currentpath, name);
}
if (UD->textgrab)
{
/* Depending on user preference, should we "reconstitute"
the element into the result text?
*/
} else {
pgxml_pathcompare(userData);
}
}
static void pgxml_endhandler(void *userData, const XML_Char *name)
{
/* Start by removing the current element off the end of the
currentpath */
char *sepptr;
sepptr=strrchr(UD->currentpath,'/');
if (sepptr==NULL) {
elog(ERROR,"There's a problem...");
sepptr=UD->currentpath;
}
if (strcmp(name, sepptr+1) !=0) {
elog(NOTICE,"Wanted [%s], got [%s]",sepptr,name);
/* unmatched entry, so do nothing */
} else {
sepptr[0]='\0'; /* Chop that element off the end */
}
if (UD->textgrab) {
pgxml_pathcompare(userData);
}
}
static void pgxml_charhandler(void *userData, const XML_Char *s, int len)
{
if (UD->textgrab) {
if (len>0) {
memcpy(UD->resptr,s,len);
UD->resptr += len;
UD->reslen += len;
}
}
}
/* Should I be using PG list types here? */
static void pgxml_finalisegrabbedtext(void *userData)
{
/* In res/reslen, we have a single result. */
UD->xpres->results[UD->xpres->rescount]= UD->resptr - UD->reslen;
UD->xpres->reslens[UD->xpres->rescount]= UD->reslen;
UD->reslen=0;
UD->xpres->rescount++;
/* This effectively concatenates all the results together but we
do know where one ends and the next begins */
}