/******************************************************** * Interface code to parse an XML document using expat ********************************************************/ #include "postgres.h" #include "fmgr.h" #include "expat.h" #include "pgxml.h" /* Memory management - we make expat use standard pg MM */ XML_Memory_Handling_Suite mhs; /* passthrough functions (palloc is a macro) */ static void *pgxml_palloc(size_t size) { return palloc(size); } static void *pgxml_repalloc(void *ptr, size_t size) { return repalloc(ptr,size); } static void pgxml_pfree(void *ptr) { return pfree(ptr); } static void pgxml_mhs_init() { mhs.malloc_fcn = pgxml_palloc; mhs.realloc_fcn = pgxml_repalloc; mhs.free_fcn = pgxml_pfree; } static void pgxml_handler_init() { /* This code should set up the relevant handlers from user-supplied settings. Quite how these settings are made is another matter :) */ } /* Returns true if document is well-formed */ PG_FUNCTION_INFO_V1(pgxml_parse); Datum pgxml_parse(PG_FUNCTION_ARGS) { /* called as pgxml_parse(document) */ XML_Parser p; text *t = PG_GETARG_TEXT_P(0); /*document buffer */ int32 docsize = VARSIZE(t) - VARHDRSZ; pgxml_mhs_init(); pgxml_handler_init(); p = XML_ParserCreate_MM(NULL,&mhs,NULL); if (! p) { elog(ERROR, "pgxml: Could not create expat parser"); PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ } if (! XML_Parse(p, (char *)VARDATA(t) , docsize, 1)) { /* elog(NOTICE, "Parse error at line %d:%s", XML_GetCurrentLineNumber(p), XML_ErrorString(XML_GetErrorCode(p))); */ XML_ParserFree(p); PG_RETURN_BOOL(false); } XML_ParserFree(p); PG_RETURN_BOOL(true); } /* XPath handling functions */ /* XPath support here is for a very skeletal kind of XPath! It was easy to program though... */ /* This first is the core function that builds a result set. The actual functions called by the user manipulate that result set in various ways. */ static XPath_Results *build_xpath_results(text *doc, text *pathstr) { XPath_Results *xpr; char *res; pgxml_udata *udata; XML_Parser p; int32 docsize; xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); memset((void *)xpr, 0, sizeof(XPath_Results)); xpr->rescount=0; docsize=VARSIZE(doc)-VARHDRSZ; /* res isn't going to be the real return type, it is just a buffer */ res = (char *) palloc(docsize); memset((void *)res, 0, docsize); xpr->resbuf = res; udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); memset((void *)udata,0,sizeof(pgxml_udata)); udata->currentpath[0]='\0'; udata->textgrab=0; udata->path= (char *) palloc(VARSIZE(pathstr)); memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr)-VARHDRSZ); udata->path[VARSIZE(pathstr)-VARHDRSZ]='\0'; udata->resptr = res; udata->reslen = 0; udata->xpres = xpr; /* Now fire up the parser */ pgxml_mhs_init(); p = XML_ParserCreate_MM(NULL,&mhs,NULL); if (! p) { elog(ERROR, "pgxml: Could not create expat parser"); pfree(xpr); pfree(udata->path); pfree(udata); pfree(res); return NULL; } XML_SetUserData(p, (void *)udata); /* Set the handlers */ XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); XML_SetCharacterDataHandler(p, pgxml_charhandler); if (! XML_Parse(p, (char *)VARDATA(doc) , docsize, 1)) { /* elog(NOTICE, "Parse error at line %d:%s", XML_GetCurrentLineNumber(p), XML_ErrorString(XML_GetErrorCode(p))); */ XML_ParserFree(p); pfree(xpr); pfree(udata->path); pfree(udata); return NULL; } pfree(udata->path); pfree(udata); XML_ParserFree(p); return xpr; } PG_FUNCTION_INFO_V1(pgxml_xpath); Datum pgxml_xpath(PG_FUNCTION_ARGS) { /* called as pgxml_xpath(document,pathstr, index) for the moment*/ XPath_Results *xpresults; text *restext; text *t = PG_GETARG_TEXT_P(0); /*document buffer */ text *t2= PG_GETARG_TEXT_P(1); int32 ind = PG_GETARG_INT32(2) - 1; xpresults = build_xpath_results(t,t2); /* This needs to be changed depending on the mechanism for returning our set of results. */ if (xpresults==NULL) /*parse error (not WF or parser failure) */ { PG_RETURN_NULL(); } if (ind >= (xpresults->rescount)) { PG_RETURN_NULL(); } restext = (text *) palloc(xpresults->reslens[ind]+VARHDRSZ); memcpy(VARDATA(restext),xpresults->results[ind],xpresults->reslens[ind]); VARATT_SIZEP(restext) = xpresults->reslens[ind]+VARHDRSZ; pfree(xpresults->resbuf); pfree(xpresults); PG_RETURN_TEXT_P(restext); } static void pgxml_pathcompare(void *userData) { char *matchpos; matchpos=strstr(UD->currentpath, UD->path); if (matchpos == NULL) { /* Should we have more logic here ? */ if (UD->textgrab) { UD->textgrab=0; pgxml_finalisegrabbedtext(userData); } return; } /* OK, we have a match of some sort. Now we need to check that our match is anchored to the *end* of the string AND that it is immediately preceded by a '/'*/ /* This test wouldn't work if strlen (UD->path) overran the length of the currentpath, but that's not possible because we got a match! */ if ((matchpos + strlen(UD->path))[0]=='\0') { if ((UD->path)[0]=='/') { if (matchpos == UD->currentpath) { UD->textgrab=1; } } else { if ((matchpos-1)[0]=='/') { UD->textgrab=1; } } } } static void pgxml_starthandler(void *userData, const XML_Char *name, const XML_Char **atts) { char sepstr[]="/"; if ((strlen(name)+strlen(UD->currentpath))>MAXPATHLENGTH-2) { elog(NOTICE,"Path too long"); } else { strncat(UD->currentpath,sepstr,1); strcat(UD->currentpath, name); } if (UD->textgrab) { /* Depending on user preference, should we "reconstitute" the element into the result text? */ } else { pgxml_pathcompare(userData); } } static void pgxml_endhandler(void *userData, const XML_Char *name) { /* Start by removing the current element off the end of the currentpath */ char *sepptr; sepptr=strrchr(UD->currentpath,'/'); if (sepptr==NULL) { elog(ERROR,"There's a problem..."); sepptr=UD->currentpath; } if (strcmp(name, sepptr+1) !=0) { elog(NOTICE,"Wanted [%s], got [%s]",sepptr,name); /* unmatched entry, so do nothing */ } else { sepptr[0]='\0'; /* Chop that element off the end */ } if (UD->textgrab) { pgxml_pathcompare(userData); } } static void pgxml_charhandler(void *userData, const XML_Char *s, int len) { if (UD->textgrab) { if (len>0) { memcpy(UD->resptr,s,len); UD->resptr += len; UD->reslen += len; } } } /* Should I be using PG list types here? */ static void pgxml_finalisegrabbedtext(void *userData) { /* In res/reslen, we have a single result. */ UD->xpres->results[UD->xpres->rescount]= UD->resptr - UD->reslen; UD->xpres->reslens[UD->xpres->rescount]= UD->reslen; UD->reslen=0; UD->xpres->rescount++; /* This effectively concatenates all the results together but we do know where one ends and the next begins */ }