From 4e8b5cd94bd24e8162ef4fd45d0ce621481844da Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Tue, 23 Jan 2007 23:39:16 +0000 Subject: [PATCH] Simplify handling of XML error messages: Just use the string provided by libxml as the detail message. As per . For converting error codes to messages, we only need to cover those codes that we raise ourselves now. --- src/backend/utils/adt/xml.c | 279 +++++++----------------------- src/test/regress/expected/xml.out | 16 +- 2 files changed, 77 insertions(+), 218 deletions(-) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index fdf7f8e0a9..3b283d247f 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.20 2007/01/20 09:27:19 petere Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.21 2007/01/23 23:39:16 petere Exp $ * *------------------------------------------------------------------------- */ @@ -51,8 +51,6 @@ #ifdef USE_LIBXML -#define PG_XML_DEFAULT_URI "dummy.xml" - static StringInfo xml_err_buf = NULL; static void xml_init(void); @@ -63,7 +61,7 @@ static void xml_pfree(void *ptr); static char *xml_pstrdup(const char *string); #endif static void xml_ereport(int level, int sqlcode, - const char *msg, void *ctxt); + const char *msg); static void xml_errorHandler(void *ctxt, const char *msg, ...); static void xml_ereport_by_code(int level, int sqlcode, const char *msg, int errcode); @@ -667,14 +665,14 @@ xmlvalidate(PG_FUNCTION_ARGS) ctxt = xmlNewParserCtxt(); if (ctxt == NULL) xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, - "could not allocate parser context", ctxt); + "could not allocate parser context"); doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data), VARSIZE(data) - VARHDRSZ, - PG_XML_DEFAULT_URI, NULL, 0); + NULL, NULL, 0); if (doc == NULL) xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, - "could not parse XML data", ctxt); + "could not parse XML data"); #if 0 uri = xmlCreateURI(); @@ -683,21 +681,21 @@ xmlvalidate(PG_FUNCTION_ARGS) uri = xmlParseURI(dtdOrUri); if (uri == NULL) xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, - "not implemented yet... (TODO)", ctxt); + "not implemented yet... (TODO)"); else #endif dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri)); if (dtd == NULL) xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, - "could not load DTD", ctxt); + "could not load DTD"); if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1) result = true; if (!result) xml_ereport(NOTICE, ERRCODE_INVALID_XML_DOCUMENT, - "validation against DTD failed", ctxt); + "validation against DTD failed"); #if 0 if (uri) @@ -977,7 +975,6 @@ finished: * Convert a C string to XML internal representation * * TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c) - * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below) */ static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding) @@ -1006,7 +1003,7 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod ctxt = xmlNewParserCtxt(); if (ctxt == NULL) xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, - "could not allocate parser context", ctxt); + "could not allocate parser context"); if (is_document) { @@ -1018,13 +1015,13 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod * SQL/XML:10.16.7.e) */ doc = xmlCtxtReadDoc(ctxt, utf8string, - PG_XML_DEFAULT_URI, + NULL, "UTF-8", - XML_PARSE_NOENT | XML_PARSE_DTDATTR - | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); + XML_PARSE_NOENT | XML_PARSE_DTDATTR + | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); if (doc == NULL) xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, - "invalid XML document", ctxt); + "invalid XML document"); } else { @@ -1036,12 +1033,14 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encod doc = xmlNewDoc(NULL); res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone); - - if (res_code == 0) - res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL); if (res_code != 0) xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT, - "invalid XML content", res_code); + "invalid XML content: invalid XML declaration", res_code); + + res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL); + if (res_code != 0) + xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT, + "invalid XML content"); doc->version = xmlStrdup(version); doc->encoding = xmlStrdup((xmlChar *) "UTF-8"); @@ -1120,50 +1119,46 @@ xml_pstrdup(const char *string) /* - * Wrapper for "ereport" function. - * Adds detail - libxml's native error message, if any. + * Wrapper for "ereport" function for XML-related errors. The "msg" + * is the SQL-level message; some can be adopted from the SQL/XML + * standard. This function adds libxml's native error messages, if + * any, as detail. */ static void xml_ereport(int level, int sqlcode, - const char *msg, void *ctxt) + const char *msg) { - xmlErrorPtr libxmlErr = NULL; + char *detail; if (xml_err_buf->len > 0) { - ereport(DEBUG1, - (errmsg("%s", xml_err_buf->data))); + detail = pstrdup(xml_err_buf->data); xml_err_buf->data[0] = '\0'; xml_err_buf->len = 0; } + else + detail = NULL; - if (ctxt != NULL) - libxmlErr = xmlCtxtGetLastError(ctxt); + /* libxml error messages end in '\n'; get rid of it */ + if (detail) + { + size_t len; - if (libxmlErr == NULL) + len = strlen(detail); + if (len > 0 && detail[len-1] == '\n') + detail[len-1] = '\0'; + + ereport(level, + (errcode(sqlcode), + errmsg("%s", msg), + errdetail("%s", detail))); + } + else { ereport(level, (errcode(sqlcode), errmsg("%s", msg))); } - else - { - /* as usual, libxml error message contains '\n'; get rid of it */ - char *xmlErrDetail; - int xmlErrLen, i; - - xmlErrDetail = pstrdup(libxmlErr->message); - xmlErrLen = strlen(xmlErrDetail); - for (i = 0; i < xmlErrLen; i++) - { - if (xmlErrDetail[i] == '\n') - xmlErrDetail[i] = '.'; - } - ereport(level, - (errcode(sqlcode), - errmsg("%s", msg), - errdetail("%s", xmlErrDetail))); - } } @@ -1194,8 +1189,11 @@ xml_errorHandler(void *ctxt, const char *msg,...) /* - * Return error message by libxml error code - * TODO make them closer to recommendations from Postgres manual + * Wrapper for "ereport" function for XML-related errors. The "msg" + * is the SQL-level message; some can be adopted from the SQL/XML + * standard. This function uses "code" to create a textual detail + * message. At the moment, we only need to cover those codes that we + * may raise in this file. */ static void xml_ereport_by_code(int level, int sqlcode, @@ -1203,173 +1201,26 @@ xml_ereport_by_code(int level, int sqlcode, { const char *det; - if (xml_err_buf->len > 0) - { - ereport(DEBUG1, - (errmsg("%s", xml_err_buf->data))); - xml_err_buf->data[0] = '\0'; - xml_err_buf->len = 0; - } - switch (code) { - case XML_ERR_INTERNAL_ERROR: - det = "libxml internal error"; - break; - case XML_ERR_ENTITY_LOOP: - det = "Detected an entity reference loop"; - break; - case XML_ERR_ENTITY_NOT_STARTED: - det = "EntityValue: \" or ' expected"; - break; - case XML_ERR_ENTITY_NOT_FINISHED: - det = "EntityValue: \" or ' expected"; - break; - case XML_ERR_ATTRIBUTE_NOT_STARTED: - det = "AttValue: \" or ' expected"; - break; - case XML_ERR_LT_IN_ATTRIBUTE: - det = "Unescaped '<' not allowed in attributes values"; - break; - case XML_ERR_LITERAL_NOT_STARTED: - det = "SystemLiteral \" or ' expected"; - break; - case XML_ERR_LITERAL_NOT_FINISHED: - det = "Unfinished System or Public ID \" or ' expected"; - break; - case XML_ERR_MISPLACED_CDATA_END: - det = "Sequence ']]>' not allowed in content"; - break; - case XML_ERR_URI_REQUIRED: - det = "SYSTEM or PUBLIC, the URI is missing"; - break; - case XML_ERR_PUBID_REQUIRED: - det = "PUBLIC, the Public Identifier is missing"; - break; - case XML_ERR_HYPHEN_IN_COMMENT: - det = "Comment must not contain '--' (double-hyphen)"; - break; - case XML_ERR_PI_NOT_STARTED: - det = "xmlParsePI : no target name"; - break; - case XML_ERR_RESERVED_XML_NAME: - det = "Invalid PI name"; - break; - case XML_ERR_NOTATION_NOT_STARTED: - det = "NOTATION: Name expected here"; - break; - case XML_ERR_NOTATION_NOT_FINISHED: - det = "'>' required to close NOTATION declaration"; - break; - case XML_ERR_VALUE_REQUIRED: - det = "Entity value required"; - break; - case XML_ERR_URI_FRAGMENT: - det = "Fragment not allowed"; - break; - case XML_ERR_ATTLIST_NOT_STARTED: - det = "'(' required to start ATTLIST enumeration"; - break; - case XML_ERR_NMTOKEN_REQUIRED: - det = "NmToken expected in ATTLIST enumeration"; - break; - case XML_ERR_ATTLIST_NOT_FINISHED: - det = "')' required to finish ATTLIST enumeration"; - break; - case XML_ERR_MIXED_NOT_STARTED: - det = "MixedContentDecl : '|' or ')*' expected"; - break; - case XML_ERR_PCDATA_REQUIRED: - det = "MixedContentDecl : '#PCDATA' expected"; - break; - case XML_ERR_ELEMCONTENT_NOT_STARTED: - det = "ContentDecl : Name or '(' expected"; - break; - case XML_ERR_ELEMCONTENT_NOT_FINISHED: - det = "ContentDecl : ',' '|' or ')' expected"; - break; - case XML_ERR_PEREF_IN_INT_SUBSET: - det = "PEReference: forbidden within markup decl in internal subset"; - break; - case XML_ERR_GT_REQUIRED: - det = "Expected '>'"; - break; - case XML_ERR_CONDSEC_INVALID: - det = "XML conditional section '[' expected"; - break; - case XML_ERR_EXT_SUBSET_NOT_FINISHED: - det = "Content error in the external subset"; - break; - case XML_ERR_CONDSEC_INVALID_KEYWORD: - det = "conditional section INCLUDE or IGNORE keyword expected"; - break; - case XML_ERR_CONDSEC_NOT_FINISHED: - det = "XML conditional section not closed"; - break; - case XML_ERR_XMLDECL_NOT_STARTED: - det = "Text declaration '' expected"; - break; - case XML_ERR_EXT_ENTITY_STANDALONE: - det = "external parsed entities cannot be standalone"; - break; - case XML_ERR_ENTITYREF_SEMICOL_MISSING: - det = "EntityRef: expecting ';'"; - break; - case XML_ERR_DOCTYPE_NOT_FINISHED: - det = "DOCTYPE improperly terminated"; - break; - case XML_ERR_LTSLASH_REQUIRED: - det = "EndTag: '' expected"; + break; default: det = "Unrecognized libxml error code: %d"; break; diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index 8c76dcee0c..d91d765303 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -6,7 +6,9 @@ INSERT INTO xmltest VALUES (1, 'one'); INSERT INTO xmltest VALUES (2, 'two'); INSERT INTO xmltest VALUES (3, '' +DETAIL: Entity: line 1: parser error : Couldn't find end of Start Tag wrong line 1 +' +DETAIL: Entity: line 1: parser error : Couldn't find end of Start Tag syntax line 1 +', NULL, ''); xmlconcat ----------------------------------- @@ -155,7 +159,9 @@ SELECT xmlparse(content 'x'); SELECT xmlparse(document 'abc'); ERROR: invalid XML document -DETAIL: Start tag expected, '<' not found. +DETAIL: Entity: line 1: parser error : Start tag expected, '<' not found +abc +^ SELECT xmlparse(document 'x'); xmlparse -------------- @@ -273,7 +279,9 @@ SELECT xml 'abc' IS NOT DOCUMENT; SELECT '<>' IS NOT DOCUMENT; ERROR: invalid XML content -DETAIL: Element name not found +DETAIL: Entity: line 1: parser error : StartTag: invalid element name +<> + ^ SELECT xmlagg(data) FROM xmltest; xmlagg --------------------------------------