/******************************************************** * Interface code to parse an XML document using expat ********************************************************/ #include "postgres.h" #include "fmgr.h" #include "expat.h" #include "pgxml.h" /* Memory management - we make expat use standard pg MM */ XML_Memory_Handling_Suite mhs; /* passthrough functions (palloc is a macro) */ static void * pgxml_palloc(size_t size) { return palloc(size); } static void * pgxml_repalloc(void *ptr, size_t size) { return repalloc(ptr, size); } static void pgxml_pfree(void *ptr) { return pfree(ptr); } static void pgxml_mhs_init() { mhs.malloc_fcn = pgxml_palloc; mhs.realloc_fcn = pgxml_repalloc; mhs.free_fcn = pgxml_pfree; } static void pgxml_handler_init() { /* * This code should set up the relevant handlers from user-supplied * settings. Quite how these settings are made is another matter :) */ } /* Returns true if document is well-formed */ PG_FUNCTION_INFO_V1(pgxml_parse); Datum pgxml_parse(PG_FUNCTION_ARGS) { /* called as pgxml_parse(document) */ XML_Parser p; text *t = PG_GETARG_TEXT_P(0); /* document buffer */ int32 docsize = VARSIZE(t) - VARHDRSZ; pgxml_mhs_init(); pgxml_handler_init(); p = XML_ParserCreate_MM(NULL, &mhs, NULL); if (!p) { elog(ERROR, "pgxml: Could not create expat parser"); PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */ } if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1)) { /* * elog(WARNING, "Parse error at line %d:%s", * XML_GetCurrentLineNumber(p), * XML_ErrorString(XML_GetErrorCode(p))); */ XML_ParserFree(p); PG_RETURN_BOOL(false); } XML_ParserFree(p); PG_RETURN_BOOL(true); } /* XPath handling functions */ /* XPath support here is for a very skeletal kind of XPath! It was easy to program though... */ /* This first is the core function that builds a result set. The actual functions called by the user manipulate that result set in various ways. */ static XPath_Results * build_xpath_results(text *doc, text *pathstr) { XPath_Results *xpr; char *res; pgxml_udata *udata; XML_Parser p; int32 docsize; xpr = (XPath_Results *) palloc((sizeof(XPath_Results))); memset((void *) xpr, 0, sizeof(XPath_Results)); xpr->rescount = 0; docsize = VARSIZE(doc) - VARHDRSZ; /* res isn't going to be the real return type, it is just a buffer */ res = (char *) palloc(docsize); memset((void *) res, 0, docsize); xpr->resbuf = res; udata = (pgxml_udata *) palloc((sizeof(pgxml_udata))); memset((void *) udata, 0, sizeof(pgxml_udata)); udata->currentpath[0] = '\0'; udata->textgrab = 0; udata->path = (char *) palloc(VARSIZE(pathstr)); memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ); udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0'; udata->resptr = res; udata->reslen = 0; udata->xpres = xpr; /* Now fire up the parser */ pgxml_mhs_init(); p = XML_ParserCreate_MM(NULL, &mhs, NULL); if (!p) { elog(ERROR, "pgxml: Could not create expat parser"); pfree(xpr); pfree(udata->path); pfree(udata); pfree(res); return NULL; } XML_SetUserData(p, (void *) udata); /* Set the handlers */ XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler); XML_SetCharacterDataHandler(p, pgxml_charhandler); if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1)) { /* * elog(WARNING, "Parse error at line %d:%s", * XML_GetCurrentLineNumber(p), * XML_ErrorString(XML_GetErrorCode(p))); */ XML_ParserFree(p); pfree(xpr); pfree(udata->path); pfree(udata); return NULL; } pfree(udata->path); pfree(udata); XML_ParserFree(p); return xpr; } PG_FUNCTION_INFO_V1(pgxml_xpath); Datum pgxml_xpath(PG_FUNCTION_ARGS) { /* called as pgxml_xpath(document,pathstr, index) for the moment */ XPath_Results *xpresults; text *restext; text *t = PG_GETARG_TEXT_P(0); /* document buffer */ text *t2 = PG_GETARG_TEXT_P(1); int32 ind = PG_GETARG_INT32(2) - 1; xpresults = build_xpath_results(t, t2); /* * This needs to be changed depending on the mechanism for returning * our set of results. */ if (xpresults == NULL) /* parse error (not WF or parser failure) */ PG_RETURN_NULL(); if (ind >= (xpresults->rescount)) PG_RETURN_NULL(); restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ); memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]); VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ; pfree(xpresults->resbuf); pfree(xpresults); PG_RETURN_TEXT_P(restext); } static void pgxml_pathcompare(void *userData) { char *matchpos; matchpos = strstr(UD->currentpath, UD->path); if (matchpos == NULL) { /* Should we have more logic here ? */ if (UD->textgrab) { UD->textgrab = 0; pgxml_finalisegrabbedtext(userData); } return; } /* * OK, we have a match of some sort. Now we need to check that our * match is anchored to the *end* of the string AND that it is * immediately preceded by a '/' */ /* * This test wouldn't work if strlen (UD->path) overran the length of * the currentpath, but that's not possible because we got a match! */ if ((matchpos + strlen(UD->path))[0] == '\0') { if ((UD->path)[0] == '/') { if (matchpos == UD->currentpath) UD->textgrab = 1; } else { if ((matchpos - 1)[0] == '/') UD->textgrab = 1; } } } static void pgxml_starthandler(void *userData, const XML_Char * name, const XML_Char ** atts) { char sepstr[] = "/"; if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2) elog(WARNING, "Path too long"); else { strncat(UD->currentpath, sepstr, 1); strcat(UD->currentpath, name); } if (UD->textgrab) { /* * Depending on user preference, should we "reconstitute" the * element into the result text? */ } else pgxml_pathcompare(userData); } static void pgxml_endhandler(void *userData, const XML_Char * name) { /* * Start by removing the current element off the end of the * currentpath */ char *sepptr; sepptr = strrchr(UD->currentpath, '/'); if (sepptr == NULL) { elog(ERROR, "There's a problem..."); sepptr = UD->currentpath; } if (strcmp(name, sepptr + 1) != 0) { elog(WARNING, "Wanted [%s], got [%s]", sepptr, name); /* unmatched entry, so do nothing */ } else { sepptr[0] = '\0'; /* Chop that element off the end */ } if (UD->textgrab) pgxml_pathcompare(userData); } static void pgxml_charhandler(void *userData, const XML_Char * s, int len) { if (UD->textgrab) { if (len > 0) { memcpy(UD->resptr, s, len); UD->resptr += len; UD->reslen += len; } } } /* Should I be using PG list types here? */ static void pgxml_finalisegrabbedtext(void *userData) { /* In res/reslen, we have a single result. */ UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen; UD->xpres->reslens[UD->xpres->rescount] = UD->reslen; UD->reslen = 0; UD->xpres->rescount++; /* * This effectively concatenates all the results together but we do * know where one ends and the next begins */ }