@@ -119,9 +119,10 @@ struct PgXmlErrorContext
119
119
120
120
static xmlParserInputPtr xmlPgEntityLoader (const char * URL , const char * ID ,
121
121
xmlParserCtxtPtr ctxt );
122
+ static void xml_errsave (Node * escontext , PgXmlErrorContext * errcxt ,
123
+ int sqlcode , const char * msg );
122
124
static void xml_errorHandler (void * data , xmlErrorPtr error );
123
- static void xml_ereport_by_code (int level , int sqlcode ,
124
- const char * msg , int code );
125
+ static int errdetail_for_xml_code (int code );
125
126
static void chopStringInfoNewlines (StringInfo str );
126
127
static void appendStringInfoLineSeparator (StringInfo str );
127
128
@@ -143,7 +144,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143
144
pg_enc encoding , int standalone );
144
145
static bool xml_doctype_in_content (const xmlChar * str );
145
146
static xmlDocPtr xml_parse (text * data , XmlOptionType xmloption_arg ,
146
- bool preserve_whitespace , int encoding );
147
+ bool preserve_whitespace , int encoding ,
148
+ Node * escontext );
147
149
static text * xml_xmlnodetoxmltype (xmlNodePtr cur , PgXmlErrorContext * xmlerrcxt );
148
150
static int xml_xpathobjtoxmlarray (xmlXPathObjectPtr xpathobj ,
149
151
ArrayBuildState * astate ,
@@ -261,14 +263,18 @@ xml_in(PG_FUNCTION_ARGS)
261
263
xmltype * vardata ;
262
264
xmlDocPtr doc ;
263
265
266
+ /* Build the result object. */
264
267
vardata = (xmltype * ) cstring_to_text (s );
265
268
266
269
/*
267
- * Parse the data to check if it is well-formed XML data. Assume that
268
- * ERROR occurred if parsing failed.
270
+ * Parse the data to check if it is well-formed XML data.
271
+ *
272
+ * Note: we don't need to worry about whether a soft error is detected.
269
273
*/
270
- doc = xml_parse (vardata , xmloption , true, GetDatabaseEncoding ());
271
- xmlFreeDoc (doc );
274
+ doc = xml_parse (vardata , xmloption , true, GetDatabaseEncoding (),
275
+ fcinfo -> context );
276
+ if (doc != NULL )
277
+ xmlFreeDoc (doc );
272
278
273
279
PG_RETURN_XML_P (vardata );
274
280
#else
@@ -323,9 +329,10 @@ xml_out_internal(xmltype *x, pg_enc target_encoding)
323
329
return buf .data ;
324
330
}
325
331
326
- xml_ereport_by_code (WARNING , ERRCODE_INTERNAL_ERROR ,
327
- "could not parse XML declaration in stored value" ,
328
- res_code );
332
+ ereport (WARNING ,
333
+ errcode (ERRCODE_INTERNAL_ERROR ),
334
+ errmsg_internal ("could not parse XML declaration in stored value" ),
335
+ errdetail_for_xml_code (res_code ));
329
336
#endif
330
337
return str ;
331
338
}
@@ -392,7 +399,7 @@ xml_recv(PG_FUNCTION_ARGS)
392
399
* Parse the data to check if it is well-formed XML data. Assume that
393
400
* xml_parse will throw ERROR if not.
394
401
*/
395
- doc = xml_parse (result , xmloption , true, encoding );
402
+ doc = xml_parse (result , xmloption , true, encoding , NULL );
396
403
xmlFreeDoc (doc );
397
404
398
405
/* Now that we know what we're dealing with, convert to server encoding */
@@ -754,7 +761,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
754
761
xmlDocPtr doc ;
755
762
756
763
doc = xml_parse (data , xmloption_arg , preserve_whitespace ,
757
- GetDatabaseEncoding ());
764
+ GetDatabaseEncoding (), NULL );
758
765
xmlFreeDoc (doc );
759
766
760
767
return (xmltype * ) data ;
@@ -895,7 +902,7 @@ xml_is_document(xmltype *arg)
895
902
PG_TRY ();
896
903
{
897
904
doc = xml_parse ((text * ) arg , XMLOPTION_DOCUMENT , true,
898
- GetDatabaseEncoding ());
905
+ GetDatabaseEncoding (), NULL );
899
906
result = true;
900
907
}
901
908
PG_CATCH ();
@@ -1500,17 +1507,26 @@ xml_doctype_in_content(const xmlChar *str)
1500
1507
1501
1508
1502
1509
/*
1503
- * Convert a C string to XML internal representation
1510
+ * Convert a text object to XML internal representation
1511
+ *
1512
+ * data is the source data (must not be toasted!), encoding is its encoding,
1513
+ * and xmloption_arg and preserve_whitespace are options for the
1514
+ * transformation.
1515
+ *
1516
+ * Errors normally result in ereport(ERROR), but if escontext is an
1517
+ * ErrorSaveContext, then "safe" errors are reported there instead, and the
1518
+ * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1504
1519
*
1505
1520
* Note: it is caller's responsibility to xmlFreeDoc() the result,
1506
- * else a permanent memory leak will ensue!
1521
+ * else a permanent memory leak will ensue! But note the result could
1522
+ * be NULL after a soft error.
1507
1523
*
1508
1524
* TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1509
1525
* yet do not use SAX - see xmlreader.c)
1510
1526
*/
1511
1527
static xmlDocPtr
1512
1528
xml_parse (text * data , XmlOptionType xmloption_arg , bool preserve_whitespace ,
1513
- int encoding )
1529
+ int encoding , Node * escontext )
1514
1530
{
1515
1531
int32 len ;
1516
1532
xmlChar * string ;
@@ -1519,9 +1535,20 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1519
1535
volatile xmlParserCtxtPtr ctxt = NULL ;
1520
1536
volatile xmlDocPtr doc = NULL ;
1521
1537
1538
+ /*
1539
+ * This step looks annoyingly redundant, but we must do it to have a
1540
+ * null-terminated string in case encoding conversion isn't required.
1541
+ */
1522
1542
len = VARSIZE_ANY_EXHDR (data ); /* will be useful later */
1523
1543
string = xml_text2xmlChar (data );
1524
1544
1545
+ /*
1546
+ * If the data isn't UTF8, we must translate before giving it to libxml.
1547
+ *
1548
+ * XXX ideally, we'd catch any encoding conversion failure and return a
1549
+ * soft error. However, failure to convert to UTF8 should be pretty darn
1550
+ * rare, so for now this is left undone.
1551
+ */
1525
1552
utf8string = pg_do_encoding_conversion (string ,
1526
1553
len ,
1527
1554
encoding ,
@@ -1539,6 +1566,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1539
1566
xmlChar * version = NULL ;
1540
1567
int standalone = 0 ;
1541
1568
1569
+ /* Any errors here are reported as hard ereport's */
1542
1570
xmlInitParser ();
1543
1571
1544
1572
ctxt = xmlNewParserCtxt ();
@@ -1555,9 +1583,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1555
1583
res_code = parse_xml_decl (utf8string ,
1556
1584
& count , & version , NULL , & standalone );
1557
1585
if (res_code != 0 )
1558
- xml_ereport_by_code (ERROR , ERRCODE_INVALID_XML_CONTENT ,
1559
- "invalid XML content: invalid XML declaration" ,
1560
- res_code );
1586
+ {
1587
+ errsave (escontext ,
1588
+ errcode (ERRCODE_INVALID_XML_CONTENT ),
1589
+ errmsg_internal ("invalid XML content: invalid XML declaration" ),
1590
+ errdetail_for_xml_code (res_code ));
1591
+ goto fail ;
1592
+ }
1561
1593
1562
1594
/* Is there a DOCTYPE element? */
1563
1595
if (xml_doctype_in_content (utf8string + count ))
@@ -1580,20 +1612,30 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1580
1612
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS ));
1581
1613
if (doc == NULL || xmlerrcxt -> err_occurred )
1582
1614
{
1583
- /* Use original option to decide which error code to throw */
1615
+ /* Use original option to decide which error code to report */
1584
1616
if (xmloption_arg == XMLOPTION_DOCUMENT )
1585
- xml_ereport (xmlerrcxt , ERROR , ERRCODE_INVALID_XML_DOCUMENT ,
1617
+ xml_errsave (escontext , xmlerrcxt ,
1618
+ ERRCODE_INVALID_XML_DOCUMENT ,
1586
1619
"invalid XML document" );
1587
1620
else
1588
- xml_ereport (xmlerrcxt , ERROR , ERRCODE_INVALID_XML_CONTENT ,
1621
+ xml_errsave (escontext , xmlerrcxt ,
1622
+ ERRCODE_INVALID_XML_CONTENT ,
1589
1623
"invalid XML content" );
1624
+ goto fail ;
1590
1625
}
1591
1626
}
1592
1627
else
1593
1628
{
1594
1629
doc = xmlNewDoc (version );
1630
+ if (doc == NULL || xmlerrcxt -> err_occurred )
1631
+ xml_ereport (xmlerrcxt , ERROR , ERRCODE_OUT_OF_MEMORY ,
1632
+ "could not allocate XML document" );
1633
+
1595
1634
Assert (doc -> encoding == NULL );
1596
1635
doc -> encoding = xmlStrdup ((const xmlChar * ) "UTF-8" );
1636
+ if (doc -> encoding == NULL || xmlerrcxt -> err_occurred )
1637
+ xml_ereport (xmlerrcxt , ERROR , ERRCODE_OUT_OF_MEMORY ,
1638
+ "could not allocate XML document" );
1597
1639
doc -> standalone = standalone ;
1598
1640
1599
1641
/* allow empty content */
@@ -1602,10 +1644,17 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1602
1644
res_code = xmlParseBalancedChunkMemory (doc , NULL , NULL , 0 ,
1603
1645
utf8string + count , NULL );
1604
1646
if (res_code != 0 || xmlerrcxt -> err_occurred )
1605
- xml_ereport (xmlerrcxt , ERROR , ERRCODE_INVALID_XML_CONTENT ,
1647
+ {
1648
+ xml_errsave (escontext , xmlerrcxt ,
1649
+ ERRCODE_INVALID_XML_CONTENT ,
1606
1650
"invalid XML content" );
1651
+ goto fail ;
1652
+ }
1607
1653
}
1608
1654
}
1655
+
1656
+ fail :
1657
+ ;
1609
1658
}
1610
1659
PG_CATCH ();
1611
1660
{
@@ -1745,6 +1794,44 @@ xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1745
1794
}
1746
1795
1747
1796
1797
+ /*
1798
+ * xml_errsave --- save an XML-related error
1799
+ *
1800
+ * If escontext is an ErrorSaveContext, error details are saved into it,
1801
+ * and control returns normally.
1802
+ *
1803
+ * Otherwise, the error is thrown, so that this is equivalent to
1804
+ * xml_ereport() with level == ERROR.
1805
+ *
1806
+ * This should be used only for errors that we're sure we do not need
1807
+ * a transaction abort to clean up after.
1808
+ */
1809
+ static void
1810
+ xml_errsave (Node * escontext , PgXmlErrorContext * errcxt ,
1811
+ int sqlcode , const char * msg )
1812
+ {
1813
+ char * detail ;
1814
+
1815
+ /* Defend against someone passing us a bogus context struct */
1816
+ if (errcxt -> magic != ERRCXT_MAGIC )
1817
+ elog (ERROR , "xml_errsave called with invalid PgXmlErrorContext" );
1818
+
1819
+ /* Flag that the current libxml error has been reported */
1820
+ errcxt -> err_occurred = false;
1821
+
1822
+ /* Include detail only if we have some text from libxml */
1823
+ if (errcxt -> err_buf .len > 0 )
1824
+ detail = errcxt -> err_buf .data ;
1825
+ else
1826
+ detail = NULL ;
1827
+
1828
+ errsave (escontext ,
1829
+ (errcode (sqlcode ),
1830
+ errmsg_internal ("%s" , msg ),
1831
+ detail ? errdetail_internal ("%s" , detail ) : 0 ));
1832
+ }
1833
+
1834
+
1748
1835
/*
1749
1836
* Error handler for libxml errors and warnings
1750
1837
*/
@@ -1917,15 +2004,16 @@ xml_errorHandler(void *data, xmlErrorPtr error)
1917
2004
1918
2005
1919
2006
/*
1920
- * Wrapper for "ereport" function for XML-related errors. The "msg"
1921
- * is the SQL-level message; some can be adopted from the SQL/XML
1922
- * standard. This function uses "code" to create a textual detail
1923
- * message. At the moment, we only need to cover those codes that we
2007
+ * Convert libxml error codes into textual errdetail messages.
2008
+ *
2009
+ * This should be called within an ereport or errsave invocation,
2010
+ * just as errdetail would be.
2011
+ *
2012
+ * At the moment, we only need to cover those codes that we
1924
2013
* may raise in this file.
1925
2014
*/
1926
- static void
1927
- xml_ereport_by_code (int level , int sqlcode ,
1928
- const char * msg , int code )
2015
+ static int
2016
+ errdetail_for_xml_code (int code )
1929
2017
{
1930
2018
const char * det ;
1931
2019
@@ -1954,10 +2042,7 @@ xml_ereport_by_code(int level, int sqlcode,
1954
2042
break ;
1955
2043
}
1956
2044
1957
- ereport (level ,
1958
- (errcode (sqlcode ),
1959
- errmsg_internal ("%s" , msg ),
1960
- errdetail (det , code )));
2045
+ return errdetail (det , code );
1961
2046
}
1962
2047
1963
2048
@@ -4241,7 +4326,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
4241
4326
/* We want to catch any exceptions and return false */
4242
4327
PG_TRY ();
4243
4328
{
4244
- doc = xml_parse (data , xmloption_arg , true, GetDatabaseEncoding ());
4329
+ doc = xml_parse (data , xmloption_arg , true, GetDatabaseEncoding (), NULL );
4245
4330
result = true;
4246
4331
}
4247
4332
PG_CATCH ();
0 commit comments