Skip to content

Commit 6082b3d

Browse files
committed
Use xmlParseInNodeContext not xmlParseBalancedChunkMemory.
xmlParseInNodeContext has basically the same functionality with a different API: we have to supply an xmlNode that's attached to a document rather than just the document. That's not hard though. The benefits are two: * Early 2.13.x releases of libxml2 contain a bug that causes xmlParseBalancedChunkMemory to return the wrong status value in some cases. This breaks our regression tests. While that bug is now fixed upstream and will probably never be seen in any production-oriented distro, it is currently a problem on some more-bleeding-edge-friendly platforms. * xmlParseBalancedChunkMemory is considered to depend on libxml2's semi-deprecated SAX1 APIs, and will go away when and if they do. There may already be libxml2 builds out there that lack this function. So there are both short- and long-term reasons to make this change. While here, avoid allocating an xmlParserCtxt in DOCUMENT parse mode, since that code path is not going to use it. Like 066e8ac, this will need to be back-patched. This is just a trial commit to see if the buildfarm agrees that we can use xmlParseInNodeContext unconditionally. Erik Wienhold and Tom Lane, per report from Frank Streitzig. Discussion: https://postgr.es/m/trinity-b0161630-d230-4598-9ebc-7a23acdb37cb-1720186432160@3c-app-gmx-bap25 Discussion: https://postgr.es/m/trinity-361ba18b-541a-4fe7-bc63-655ae3a7d599-1720259822452@3c-app-gmx-bs01
1 parent 1ff39f4 commit 6082b3d

File tree

1 file changed

+52
-23
lines changed
  • src/backend/utils/adt

1 file changed

+52
-23
lines changed

src/backend/utils/adt/xml.c

+52-23
Original file line numberDiff line numberDiff line change
@@ -1696,9 +1696,9 @@ xml_doctype_in_content(const xmlChar *str)
16961696
* XmlOptionType actually used to parse the input (typically the same as
16971697
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
16981698
*
1699-
* If parsed_nodes isn't NULL and the input is not an XML document, the list
1700-
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
1701-
* to *parsed_nodes.
1699+
* If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1700+
* of parsed nodes from the xmlParseInNodeContext call will be returned
1701+
* to *parsed_nodes. (It is caller's responsibility to free that.)
17021702
*
17031703
* Errors normally result in ereport(ERROR), but if escontext is an
17041704
* ErrorSaveContext, then "safe" errors are reported there instead, and the
@@ -1750,6 +1750,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17501750
PG_TRY();
17511751
{
17521752
bool parse_as_document = false;
1753+
int options;
17531754
int res_code;
17541755
size_t count = 0;
17551756
xmlChar *version = NULL;
@@ -1758,11 +1759,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17581759
/* Any errors here are reported as hard ereport's */
17591760
xmlInitParser();
17601761

1761-
ctxt = xmlNewParserCtxt();
1762-
if (ctxt == NULL || xmlerrcxt->err_occurred)
1763-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1764-
"could not allocate parser context");
1765-
17661762
/* Decide whether to parse as document or content */
17671763
if (xmloption_arg == XMLOPTION_DOCUMENT)
17681764
parse_as_document = true;
@@ -1785,6 +1781,18 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17851781
parse_as_document = true;
17861782
}
17871783

1784+
/*
1785+
* Select parse options.
1786+
*
1787+
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1788+
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1789+
* internal DTD are applied'. As for external DTDs, we try to support
1790+
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1791+
* happen because xmlPgEntityLoader prevents it.
1792+
*/
1793+
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1794+
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1795+
17881796
/* initialize output parameters */
17891797
if (parsed_xmloptiontype != NULL)
17901798
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1794,18 +1802,16 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17941802

17951803
if (parse_as_document)
17961804
{
1797-
/*
1798-
* Note, that here we try to apply DTD defaults
1799-
* (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1800-
* 'Default values defined by internal DTD are applied'. As for
1801-
* external DTDs, we try to support them too, (see SQL/XML:2008 GR
1802-
* 10.16.7.e)
1803-
*/
1805+
ctxt = xmlNewParserCtxt();
1806+
if (ctxt == NULL || xmlerrcxt->err_occurred)
1807+
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1808+
"could not allocate parser context");
1809+
18041810
doc = xmlCtxtReadDoc(ctxt, utf8string,
1805-
NULL,
1811+
NULL, /* no URL */
18061812
"UTF-8",
1807-
XML_PARSE_NOENT | XML_PARSE_DTDATTR
1808-
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1813+
options);
1814+
18091815
if (doc == NULL || xmlerrcxt->err_occurred)
18101816
{
18111817
/* Use original option to decide which error code to report */
@@ -1822,6 +1828,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18221828
}
18231829
else
18241830
{
1831+
xmlNodePtr root;
1832+
1833+
/* set up document with empty root node to be the context node */
18251834
doc = xmlNewDoc(version);
18261835
if (doc == NULL || xmlerrcxt->err_occurred)
18271836
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1834,19 +1843,38 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18341843
"could not allocate XML document");
18351844
doc->standalone = standalone;
18361845

1846+
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1847+
if (root == NULL || xmlerrcxt->err_occurred)
1848+
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1849+
"could not allocate xml node");
1850+
/* This attaches root to doc, so we need not free it separately. */
1851+
xmlDocSetRootElement(doc, root);
1852+
18371853
/* allow empty content */
18381854
if (*(utf8string + count))
18391855
{
1840-
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1841-
utf8string + count,
1842-
parsed_nodes);
1843-
if (res_code != 0 || xmlerrcxt->err_occurred)
1856+
xmlNodePtr node_list = NULL;
1857+
xmlParserErrors res;
1858+
1859+
res = xmlParseInNodeContext(root,
1860+
(char *) utf8string + count,
1861+
strlen((char *) utf8string + count),
1862+
options,
1863+
&node_list);
1864+
1865+
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
18441866
{
1867+
xmlFreeNodeList(node_list);
18451868
xml_errsave(escontext, xmlerrcxt,
18461869
ERRCODE_INVALID_XML_CONTENT,
18471870
"invalid XML content");
18481871
goto fail;
18491872
}
1873+
1874+
if (parsed_nodes != NULL)
1875+
*parsed_nodes = node_list;
1876+
else
1877+
xmlFreeNodeList(node_list);
18501878
}
18511879
}
18521880

@@ -1866,7 +1894,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18661894
}
18671895
PG_END_TRY();
18681896

1869-
xmlFreeParserCtxt(ctxt);
1897+
if (ctxt != NULL)
1898+
xmlFreeParserCtxt(ctxt);
18701899

18711900
pg_xml_done(xmlerrcxt, false);
18721901

0 commit comments

Comments
 (0)