Skip to content

Commit 71c0921

Browse files
tglsfdcewie
andcommitted
Avoid regression in the size of XML input that we will accept.
This mostly reverts commit 6082b3d, "Use xmlParseInNodeContext not xmlParseBalancedChunkMemory". It turns out that xmlParseInNodeContext will reject text chunks exceeding 10MB, while (in most libxml2 versions) xmlParseBalancedChunkMemory will not. The bleeding-edge libxml2 bug that we needed to work around a year ago is presumably no longer a factor, and the argument that xmlParseBalancedChunkMemory is semi-deprecated is not enough to justify a functionality regression. Hence, go back to doing it the old way. Reported-by: Michael Paquier <michael@paquier.xyz> Author: Michael Paquier <michael@paquier.xyz> Co-authored-by: Erik Wienhold <ewie@ewie.name> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/aIGknLuc8b8ega2X@paquier.xyz Backpatch-through: 13
1 parent d5b9b2d commit 71c0921

File tree

1 file changed

+30
-38
lines changed
  • src/backend/utils/adt

1 file changed

+30
-38
lines changed

src/backend/utils/adt/xml.c

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,7 +1769,7 @@ xml_doctype_in_content(const xmlChar *str)
17691769
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
17701770
*
17711771
* If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1772-
* of parsed nodes from the xmlParseInNodeContext call will be returned
1772+
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
17731773
* to *parsed_nodes. (It is caller's responsibility to free that.)
17741774
*
17751775
* Errors normally result in ereport(ERROR), but if escontext is an
@@ -1795,6 +1795,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
17951795
PgXmlErrorContext *xmlerrcxt;
17961796
volatile xmlParserCtxtPtr ctxt = NULL;
17971797
volatile xmlDocPtr doc = NULL;
1798+
volatile int save_keep_blanks = -1;
17981799

17991800
/*
18001801
* This step looks annoyingly redundant, but we must do it to have a
@@ -1822,7 +1823,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18221823
PG_TRY();
18231824
{
18241825
bool parse_as_document = false;
1825-
int options;
18261826
int res_code;
18271827
size_t count = 0;
18281828
xmlChar *version = NULL;
@@ -1853,18 +1853,6 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18531853
parse_as_document = true;
18541854
}
18551855

1856-
/*
1857-
* Select parse options.
1858-
*
1859-
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1860-
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1861-
* internal DTD are applied'. As for external DTDs, we try to support
1862-
* them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1863-
* happen because xmlPgEntityLoader prevents it.
1864-
*/
1865-
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1866-
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1867-
18681856
/* initialize output parameters */
18691857
if (parsed_xmloptiontype != NULL)
18701858
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
@@ -1874,11 +1862,26 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
18741862

18751863
if (parse_as_document)
18761864
{
1865+
int options;
1866+
1867+
/* set up parser context used by xmlCtxtReadDoc */
18771868
ctxt = xmlNewParserCtxt();
18781869
if (ctxt == NULL || xmlerrcxt->err_occurred)
18791870
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
18801871
"could not allocate parser context");
18811872

1873+
/*
1874+
* Select parse options.
1875+
*
1876+
* Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1877+
* according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1878+
* by internal DTD are applied'. As for external DTDs, we try to
1879+
* support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1880+
* doesn't really happen because xmlPgEntityLoader prevents it.
1881+
*/
1882+
options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1883+
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1884+
18821885
doc = xmlCtxtReadDoc(ctxt, utf8string,
18831886
NULL, /* no URL */
18841887
"UTF-8",
@@ -1900,10 +1903,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
19001903
}
19011904
else
19021905
{
1903-
xmlNodePtr root;
1904-
xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY;
1905-
1906-
/* set up document with empty root node to be the context node */
1906+
/* set up document that xmlParseBalancedChunkMemory will add to */
19071907
doc = xmlNewDoc(version);
19081908
if (doc == NULL || xmlerrcxt->err_occurred)
19091909
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
@@ -1916,36 +1916,23 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
19161916
"could not allocate XML document");
19171917
doc->standalone = standalone;
19181918

1919-
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1920-
if (root == NULL || xmlerrcxt->err_occurred)
1921-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1922-
"could not allocate xml node");
1923-
1924-
/*
1925-
* This attaches root to doc, so we need not free it separately;
1926-
* and there can't yet be any old root to free.
1927-
*/
1928-
oldroot = xmlDocSetRootElement(doc, root);
1929-
Assert(oldroot == NULL);
1919+
/* set parse options --- have to do this the ugly way */
1920+
save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
19301921

19311922
/* allow empty content */
19321923
if (*(utf8string + count))
19331924
{
19341925
xmlNodePtr node_list = NULL;
1935-
xmlParserErrors res;
1936-
1937-
res = xmlParseInNodeContext(root,
1938-
(char *) utf8string + count,
1939-
strlen((char *) utf8string + count),
1940-
options,
1941-
&node_list);
19421926

1943-
if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1927+
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1928+
utf8string + count,
1929+
&node_list);
1930+
if (res_code != 0 || xmlerrcxt->err_occurred)
19441931
{
1945-
xmlFreeNodeList(node_list);
19461932
xml_errsave(escontext, xmlerrcxt,
19471933
ERRCODE_INVALID_XML_CONTENT,
19481934
"invalid XML content");
1935+
xmlFreeNodeList(node_list);
19491936
goto fail;
19501937
}
19511938

@@ -1961,6 +1948,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
19611948
}
19621949
PG_CATCH();
19631950
{
1951+
if (save_keep_blanks != -1)
1952+
xmlKeepBlanksDefault(save_keep_blanks);
19641953
if (doc != NULL)
19651954
xmlFreeDoc(doc);
19661955
if (ctxt != NULL)
@@ -1972,6 +1961,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
19721961
}
19731962
PG_END_TRY();
19741963

1964+
if (save_keep_blanks != -1)
1965+
xmlKeepBlanksDefault(save_keep_blanks);
1966+
19751967
if (ctxt != NULL)
19761968
xmlFreeParserCtxt(ctxt);
19771969

0 commit comments

Comments
 (0)