Skip to content

Commit 37bef84

Browse files
committed
Convert xml_in to report errors softly.
The key idea here is that xml_parse must distinguish hard errors from soft errors. We want to throw a hard error for libxml initialization failures: those might be out-of-memory, or something else, but in any case they are not the fault of the input string. If we get to the point of parsing the input, and something goes wrong, we can fairly consider that to mean bad input. One thing that arguably does mean bad input, but I didn't trouble to handle softly, is encoding conversion failure while converting the server encoding to UTF8. This might be something to improve later, but it seems like a pretty low-probability scenario. Discussion: https://postgr.es/m/3564577.1671142683@sss.pgh.pa.us
1 parent e52f8b3 commit 37bef84

File tree

5 files changed

+205
-35
lines changed

5 files changed

+205
-35
lines changed

src/backend/utils/adt/xml.c

Lines changed: 120 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,10 @@ struct PgXmlErrorContext
119119

120120
static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121121
xmlParserCtxtPtr ctxt);
122+
static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
123+
int sqlcode, const char *msg);
122124
static void xml_errorHandler(void *data, xmlErrorPtr error);
123-
static void xml_ereport_by_code(int level, int sqlcode,
124-
const char *msg, int code);
125+
static int errdetail_for_xml_code(int code);
125126
static void chopStringInfoNewlines(StringInfo str);
126127
static void appendStringInfoLineSeparator(StringInfo str);
127128

@@ -143,7 +144,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143144
pg_enc encoding, int standalone);
144145
static bool xml_doctype_in_content(const xmlChar *str);
145146
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146-
bool preserve_whitespace, int encoding);
147+
bool preserve_whitespace, int encoding,
148+
Node *escontext);
147149
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148150
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149151
ArrayBuildState *astate,
@@ -261,14 +263,18 @@ xml_in(PG_FUNCTION_ARGS)
261263
xmltype *vardata;
262264
xmlDocPtr doc;
263265

266+
/* Build the result object. */
264267
vardata = (xmltype *) cstring_to_text(s);
265268

266269
/*
267-
* Parse the data to check if it is well-formed XML data. Assume that
268-
* ERROR occurred if parsing failed.
270+
* Parse the data to check if it is well-formed XML data.
271+
*
272+
* Note: we don't need to worry about whether a soft error is detected.
269273
*/
270-
doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
271-
xmlFreeDoc(doc);
274+
doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
275+
fcinfo->context);
276+
if (doc != NULL)
277+
xmlFreeDoc(doc);
272278

273279
PG_RETURN_XML_P(vardata);
274280
#else
@@ -323,9 +329,10 @@ xml_out_internal(xmltype *x, pg_enc target_encoding)
323329
return buf.data;
324330
}
325331

326-
xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
327-
"could not parse XML declaration in stored value",
328-
res_code);
332+
ereport(WARNING,
333+
errcode(ERRCODE_INTERNAL_ERROR),
334+
errmsg_internal("could not parse XML declaration in stored value"),
335+
errdetail_for_xml_code(res_code));
329336
#endif
330337
return str;
331338
}
@@ -392,7 +399,7 @@ xml_recv(PG_FUNCTION_ARGS)
392399
* Parse the data to check if it is well-formed XML data. Assume that
393400
* xml_parse will throw ERROR if not.
394401
*/
395-
doc = xml_parse(result, xmloption, true, encoding);
402+
doc = xml_parse(result, xmloption, true, encoding, NULL);
396403
xmlFreeDoc(doc);
397404

398405
/* Now that we know what we're dealing with, convert to server encoding */
@@ -754,7 +761,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
754761
xmlDocPtr doc;
755762

756763
doc = xml_parse(data, xmloption_arg, preserve_whitespace,
757-
GetDatabaseEncoding());
764+
GetDatabaseEncoding(), NULL);
758765
xmlFreeDoc(doc);
759766

760767
return (xmltype *) data;
@@ -895,7 +902,7 @@ xml_is_document(xmltype *arg)
895902
PG_TRY();
896903
{
897904
doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
898-
GetDatabaseEncoding());
905+
GetDatabaseEncoding(), NULL);
899906
result = true;
900907
}
901908
PG_CATCH();
@@ -1500,17 +1507,26 @@ xml_doctype_in_content(const xmlChar *str)
15001507

15011508

15021509
/*
1503-
* Convert a C string to XML internal representation
1510+
* Convert a text object to XML internal representation
1511+
*
1512+
* data is the source data (must not be toasted!), encoding is its encoding,
1513+
* and xmloption_arg and preserve_whitespace are options for the
1514+
* transformation.
1515+
*
1516+
* Errors normally result in ereport(ERROR), but if escontext is an
1517+
* ErrorSaveContext, then "safe" errors are reported there instead, and the
1518+
* caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
15041519
*
15051520
* Note: it is caller's responsibility to xmlFreeDoc() the result,
1506-
* else a permanent memory leak will ensue!
1521+
* else a permanent memory leak will ensue! But note the result could
1522+
* be NULL after a soft error.
15071523
*
15081524
* TODO maybe libxml2's xmlreader is better? (do not construct DOM,
15091525
* yet do not use SAX - see xmlreader.c)
15101526
*/
15111527
static xmlDocPtr
15121528
xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1513-
int encoding)
1529+
int encoding, Node *escontext)
15141530
{
15151531
int32 len;
15161532
xmlChar *string;
@@ -1519,9 +1535,20 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15191535
volatile xmlParserCtxtPtr ctxt = NULL;
15201536
volatile xmlDocPtr doc = NULL;
15211537

1538+
/*
1539+
* This step looks annoyingly redundant, but we must do it to have a
1540+
* null-terminated string in case encoding conversion isn't required.
1541+
*/
15221542
len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
15231543
string = xml_text2xmlChar(data);
15241544

1545+
/*
1546+
* If the data isn't UTF8, we must translate before giving it to libxml.
1547+
*
1548+
* XXX ideally, we'd catch any encoding conversion failure and return a
1549+
* soft error. However, failure to convert to UTF8 should be pretty darn
1550+
* rare, so for now this is left undone.
1551+
*/
15251552
utf8string = pg_do_encoding_conversion(string,
15261553
len,
15271554
encoding,
@@ -1539,6 +1566,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15391566
xmlChar *version = NULL;
15401567
int standalone = 0;
15411568

1569+
/* Any errors here are reported as hard ereport's */
15421570
xmlInitParser();
15431571

15441572
ctxt = xmlNewParserCtxt();
@@ -1555,9 +1583,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15551583
res_code = parse_xml_decl(utf8string,
15561584
&count, &version, NULL, &standalone);
15571585
if (res_code != 0)
1558-
xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1559-
"invalid XML content: invalid XML declaration",
1560-
res_code);
1586+
{
1587+
errsave(escontext,
1588+
errcode(ERRCODE_INVALID_XML_CONTENT),
1589+
errmsg_internal("invalid XML content: invalid XML declaration"),
1590+
errdetail_for_xml_code(res_code));
1591+
goto fail;
1592+
}
15611593

15621594
/* Is there a DOCTYPE element? */
15631595
if (xml_doctype_in_content(utf8string + count))
@@ -1580,20 +1612,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
15801612
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
15811613
if (doc == NULL || xmlerrcxt->err_occurred)
15821614
{
1583-
/* Use original option to decide which error code to throw */
1615+
/* Use original option to decide which error code to report */
15841616
if (xmloption_arg == XMLOPTION_DOCUMENT)
1585-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1617+
xml_errsave(escontext, xmlerrcxt,
1618+
ERRCODE_INVALID_XML_DOCUMENT,
15861619
"invalid XML document");
15871620
else
1588-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1621+
xml_errsave(escontext, xmlerrcxt,
1622+
ERRCODE_INVALID_XML_CONTENT,
15891623
"invalid XML content");
1624+
goto fail;
15901625
}
15911626
}
15921627
else
15931628
{
15941629
doc = xmlNewDoc(version);
1630+
if (doc == NULL || xmlerrcxt->err_occurred)
1631+
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1632+
"could not allocate XML document");
1633+
15951634
Assert(doc->encoding == NULL);
15961635
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1636+
if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1637+
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1638+
"could not allocate XML document");
15971639
doc->standalone = standalone;
15981640

15991641
/* allow empty content */
@@ -1602,10 +1644,17 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
16021644
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
16031645
utf8string + count, NULL);
16041646
if (res_code != 0 || xmlerrcxt->err_occurred)
1605-
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1647+
{
1648+
xml_errsave(escontext, xmlerrcxt,
1649+
ERRCODE_INVALID_XML_CONTENT,
16061650
"invalid XML content");
1651+
goto fail;
1652+
}
16071653
}
16081654
}
1655+
1656+
fail:
1657+
;
16091658
}
16101659
PG_CATCH();
16111660
{
@@ -1745,6 +1794,44 @@ xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
17451794
}
17461795

17471796

1797+
/*
1798+
* xml_errsave --- save an XML-related error
1799+
*
1800+
* If escontext is an ErrorSaveContext, error details are saved into it,
1801+
* and control returns normally.
1802+
*
1803+
* Otherwise, the error is thrown, so that this is equivalent to
1804+
* xml_ereport() with level == ERROR.
1805+
*
1806+
* This should be used only for errors that we're sure we do not need
1807+
* a transaction abort to clean up after.
1808+
*/
1809+
static void
1810+
xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
1811+
int sqlcode, const char *msg)
1812+
{
1813+
char *detail;
1814+
1815+
/* Defend against someone passing us a bogus context struct */
1816+
if (errcxt->magic != ERRCXT_MAGIC)
1817+
elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
1818+
1819+
/* Flag that the current libxml error has been reported */
1820+
errcxt->err_occurred = false;
1821+
1822+
/* Include detail only if we have some text from libxml */
1823+
if (errcxt->err_buf.len > 0)
1824+
detail = errcxt->err_buf.data;
1825+
else
1826+
detail = NULL;
1827+
1828+
errsave(escontext,
1829+
(errcode(sqlcode),
1830+
errmsg_internal("%s", msg),
1831+
detail ? errdetail_internal("%s", detail) : 0));
1832+
}
1833+
1834+
17481835
/*
17491836
* Error handler for libxml errors and warnings
17501837
*/
@@ -1917,15 +2004,16 @@ xml_errorHandler(void *data, xmlErrorPtr error)
19172004

19182005

19192006
/*
1920-
* Wrapper for "ereport" function for XML-related errors. The "msg"
1921-
* is the SQL-level message; some can be adopted from the SQL/XML
1922-
* standard. This function uses "code" to create a textual detail
1923-
* message. At the moment, we only need to cover those codes that we
2007+
* Convert libxml error codes into textual errdetail messages.
2008+
*
2009+
* This should be called within an ereport or errsave invocation,
2010+
* just as errdetail would be.
2011+
*
2012+
* At the moment, we only need to cover those codes that we
19242013
* may raise in this file.
19252014
*/
1926-
static void
1927-
xml_ereport_by_code(int level, int sqlcode,
1928-
const char *msg, int code)
2015+
static int
2016+
errdetail_for_xml_code(int code)
19292017
{
19302018
const char *det;
19312019

@@ -1954,10 +2042,7 @@ xml_ereport_by_code(int level, int sqlcode,
19542042
break;
19552043
}
19562044

1957-
ereport(level,
1958-
(errcode(sqlcode),
1959-
errmsg_internal("%s", msg),
1960-
errdetail(det, code)));
2045+
return errdetail(det, code);
19612046
}
19622047

19632048

@@ -4241,7 +4326,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
42414326
/* We want to catch any exceptions and return false */
42424327
PG_TRY();
42434328
{
4244-
doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4329+
doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(), NULL);
42454330
result = true;
42464331
}
42474332
PG_CATCH();

src/test/regress/expected/xml.out

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,37 @@ SELECT * FROM xmltest;
1818
2 | <value>two</value>
1919
(2 rows)
2020

21+
-- test non-throwing API, too
22+
SELECT pg_input_is_valid('<value>one</value>', 'xml');
23+
pg_input_is_valid
24+
-------------------
25+
t
26+
(1 row)
27+
28+
SELECT pg_input_is_valid('<value>one</', 'xml');
29+
pg_input_is_valid
30+
-------------------
31+
f
32+
(1 row)
33+
34+
SELECT pg_input_error_message('<value>one</', 'xml');
35+
pg_input_error_message
36+
------------------------
37+
invalid XML content
38+
(1 row)
39+
40+
SELECT pg_input_is_valid('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
41+
pg_input_is_valid
42+
-------------------
43+
f
44+
(1 row)
45+
46+
SELECT pg_input_error_message('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
47+
pg_input_error_message
48+
----------------------------------------------
49+
invalid XML content: invalid XML declaration
50+
(1 row)
51+
2152
SELECT xmlcomment('test');
2253
xmlcomment
2354
-------------

src/test/regress/expected/xml_1.out

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,22 @@ SELECT * FROM xmltest;
2222
----+------
2323
(0 rows)
2424

25+
-- test non-throwing API, too
26+
SELECT pg_input_is_valid('<value>one</value>', 'xml');
27+
ERROR: unsupported XML feature
28+
DETAIL: This functionality requires the server to be built with libxml support.
29+
SELECT pg_input_is_valid('<value>one</', 'xml');
30+
ERROR: unsupported XML feature
31+
DETAIL: This functionality requires the server to be built with libxml support.
32+
SELECT pg_input_error_message('<value>one</', 'xml');
33+
ERROR: unsupported XML feature
34+
DETAIL: This functionality requires the server to be built with libxml support.
35+
SELECT pg_input_is_valid('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
36+
ERROR: unsupported XML feature
37+
DETAIL: This functionality requires the server to be built with libxml support.
38+
SELECT pg_input_error_message('<?xml version="1.0" standalone="y"?><foo/>', 'xml');
39+
ERROR: unsupported XML feature
40+
DETAIL: This functionality requires the server to be built with libxml support.
2541
SELECT xmlcomment('test');
2642
ERROR: unsupported XML feature
2743
DETAIL: This functionality requires the server to be built with libxml support.

0 commit comments

Comments
 (0)