Skip to content

Commit 251cf2e

Browse files
committed
Fix minor deficiencies in XMLTABLE, xpath(), xmlexists()
Correctly process nodes of more types than previously. In some cases, nodes were being ignored (nothing was output); in other cases, trying to return them resulted in errors about unrecognized nodes. In yet other cases, necessary escaping (of XML special characters) was not being done. Fix all those (as far as the authors could find) and add regression tests cases verifying the new behavior. I (Álvaro) was of two minds about backpatching these changes. They do seem bugfixes that would benefit most users of the affected functions; but on the other hand it would change established behavior in minor releases, so it seems prudent not to. Authors: Pavel Stehule, Markus Winand, Chapman Flack Discussion: https://postgr.es/m/CAFj8pRA6J25CtAZ2TuRvxK3gat7-bBUYh0rfE2yM7Hj9GD14Dg@mail.gmail.com https://postgr.es/m/8BDB0627-2105-4564-AA76-7849F028B96E@winand.at The elephant in the room as pointed out by Chapman Flack, not fixed in this commit, is that we still have XMLTABLE operating on XPath 1.0 instead of the standard-mandated XQuery (or even its subset XPath 2.0). Fixing that is a major undertaking, however.
1 parent 1d33858 commit 251cf2e

File tree

6 files changed

+164
-78
lines changed

6 files changed

+164
-78
lines changed

doc/src/sgml/func.sgml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11010,9 +11010,9 @@ $$ AS data;
1101011010

1101111011
SELECT xmltable.*
1101211012
FROM xmlelements, XMLTABLE('/root' PASSING data COLUMNS element text);
11013-
element
11014-
----------------------
11015-
Hello2a2 bbbCC
11013+
element
11014+
-------------------------
11015+
Hello2a2 bbbxxxCC
1101611016
]]></screen>
1101711017
</para>
1101811018

src/backend/utils/adt/xml.c

Lines changed: 86 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,36 @@ pg_xmlCharStrndup(const char *str, size_t len)
11761176
return result;
11771177
}
11781178

1179+
/*
1180+
* Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1181+
*
1182+
* The input xmlChar is freed regardless of success of the copy.
1183+
*/
1184+
static char *
1185+
xml_pstrdup_and_free(xmlChar *str)
1186+
{
1187+
char *result;
1188+
1189+
if (str)
1190+
{
1191+
PG_TRY();
1192+
{
1193+
result = pstrdup((char *) str);
1194+
}
1195+
PG_CATCH();
1196+
{
1197+
xmlFree(str);
1198+
PG_RE_THROW();
1199+
}
1200+
PG_END_TRY();
1201+
xmlFree(str);
1202+
}
1203+
else
1204+
result = NULL;
1205+
1206+
return result;
1207+
}
1208+
11791209
/*
11801210
* str is the null-terminated input string. Remaining arguments are
11811211
* output arguments; each can be NULL if value is not wanted.
@@ -3678,15 +3708,17 @@ SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
36783708
#ifdef USE_LIBXML
36793709

36803710
/*
3681-
* Convert XML node to text (dump subtree in case of element,
3682-
* return value otherwise)
3711+
* Convert XML node to text.
3712+
*
3713+
* For attribute and text nodes, return the escaped text. For anything else,
3714+
* dump the whole subtree.
36833715
*/
36843716
static text *
36853717
xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
36863718
{
36873719
xmltype *result;
36883720

3689-
if (cur->type == XML_ELEMENT_NODE)
3721+
if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
36903722
{
36913723
xmlBufferPtr buf;
36923724
xmlNodePtr cur_copy;
@@ -4479,9 +4511,9 @@ XmlTableGetValue(TableFuncScanState *state, int colnum,
44794511
/*
44804512
* There are four possible cases, depending on the number of nodes
44814513
* returned by the XPath expression and the type of the target column:
4482-
* a) XPath returns no nodes. b) One node is returned, and column is
4483-
* of type XML. c) One node, column type other than XML. d) Multiple
4484-
* nodes are returned.
4514+
* a) XPath returns no nodes. b) The target type is XML (return all
4515+
* as XML). For non-XML return types: c) One node (return content).
4516+
* d) Multiple nodes (error).
44854517
*/
44864518
if (xpathobj->type == XPATH_NODESET)
44874519
{
@@ -4494,84 +4526,69 @@ XmlTableGetValue(TableFuncScanState *state, int colnum,
44944526
{
44954527
*isnull = true;
44964528
}
4497-
else if (count == 1 && typid == XMLOID)
4498-
{
4499-
text *textstr;
4500-
4501-
/* simple case, result is one value */
4502-
textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
4503-
xtCxt->xmlerrcxt);
4504-
cstr = text_to_cstring(textstr);
4505-
}
4506-
else if (count == 1)
4529+
else
45074530
{
4508-
xmlChar *str;
4509-
xmlNodePtr node;
4510-
4511-
/*
4512-
* Most nodes (elements and even attributes) store their data
4513-
* in children nodes. If they don't have children nodes, it
4514-
* means that they are empty (e.g. <element/>). Text nodes and
4515-
* CDATA sections are an exception: they don't have children
4516-
* but have content in the Text/CDATA node itself.
4517-
*/
4518-
node = xpathobj->nodesetval->nodeTab[0];
4519-
if (node->type != XML_CDATA_SECTION_NODE &&
4520-
node->type != XML_TEXT_NODE)
4521-
node = node->xmlChildrenNode;
4522-
4523-
str = xmlNodeListGetString(xtCxt->doc, node, 1);
4524-
if (str != NULL)
4531+
if (typid == XMLOID)
45254532
{
4526-
PG_TRY();
4527-
{
4528-
cstr = pstrdup((char *) str);
4529-
}
4530-
PG_CATCH();
4533+
text *textstr;
4534+
StringInfoData str;
4535+
4536+
/* Concatenate serialized values */
4537+
initStringInfo(&str);
4538+
for (int i = 0; i < count; i++)
45314539
{
4532-
xmlFree(str);
4533-
PG_RE_THROW();
4540+
textstr =
4541+
xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4542+
xtCxt->xmlerrcxt);
4543+
4544+
appendStringInfoText(&str, textstr);
45344545
}
4535-
PG_END_TRY();
4536-
xmlFree(str);
4546+
cstr = str.data;
45374547
}
45384548
else
45394549
{
4540-
/* Ensure mapping of empty tags to PostgreSQL values. */
4541-
cstr = "";
4550+
xmlChar *str;
4551+
4552+
if (count > 1)
4553+
ereport(ERROR,
4554+
(errcode(ERRCODE_CARDINALITY_VIOLATION),
4555+
errmsg("more than one value returned by column XPath expression")));
4556+
4557+
str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4558+
cstr = str ? xml_pstrdup_and_free(str) : "";
45424559
}
45434560
}
4561+
}
4562+
else if (xpathobj->type == XPATH_STRING)
4563+
{
4564+
/* Content should be escaped when target will be XML */
4565+
if (typid == XMLOID)
4566+
cstr = escape_xml((char *) xpathobj->stringval);
45444567
else
4545-
{
4546-
StringInfoData str;
4547-
int i;
4568+
cstr = (char *) xpathobj->stringval;
4569+
}
4570+
else if (xpathobj->type == XPATH_BOOLEAN)
4571+
{
4572+
char typcategory;
4573+
bool typispreferred;
4574+
xmlChar *str;
45484575

4549-
Assert(count > 1);
4576+
/* Allow implicit casting from boolean to numbers */
4577+
get_type_category_preferred(typid, &typcategory, &typispreferred);
45504578

4551-
/*
4552-
* When evaluating the XPath expression returns multiple
4553-
* nodes, the result is the concatenation of them all. The
4554-
* target type must be XML.
4555-
*/
4556-
if (typid != XMLOID)
4557-
ereport(ERROR,
4558-
(errcode(ERRCODE_CARDINALITY_VIOLATION),
4559-
errmsg("more than one value returned by column XPath expression")));
4579+
if (typcategory != TYPCATEGORY_NUMERIC)
4580+
str = xmlXPathCastBooleanToString(xpathobj->boolval);
4581+
else
4582+
str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
45604583

4561-
/* Concatenate serialized values */
4562-
initStringInfo(&str);
4563-
for (i = 0; i < count; i++)
4564-
{
4565-
appendStringInfoText(&str,
4566-
xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4567-
xtCxt->xmlerrcxt));
4568-
}
4569-
cstr = str.data;
4570-
}
4584+
cstr = xml_pstrdup_and_free(str);
45714585
}
4572-
else if (xpathobj->type == XPATH_STRING)
4586+
else if (xpathobj->type == XPATH_NUMBER)
45734587
{
4574-
cstr = (char *) xpathobj->stringval;
4588+
xmlChar *str;
4589+
4590+
str = xmlXPathCastNumberToString(xpathobj->floatval);
4591+
cstr = xml_pstrdup_and_free(str);
45754592
}
45764593
else
45774594
elog(ERROR, "unexpected XPath object type %u", xpathobj->type);

src/test/regress/expected/xml.out

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,9 +1210,9 @@ SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME="Japan"
12101210
(2 rows)
12111211

12121212
SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z--> bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text);
1213-
element
1214-
-------------------
1215-
a1aa2a bbbbcccc
1213+
element
1214+
----------------------
1215+
a1aa2a bbbbxxxcccc
12161216
(1 row)
12171217

12181218
SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z--> bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text PATH 'element/text()'); -- should fail
@@ -1493,3 +1493,24 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c
14931493
14
14941494
(4 rows)
14951495

1496+
-- XPath result can be boolean or number too
1497+
SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
1498+
a | b | c | d | e
1499+
----------+---+----+---+---
1500+
<a>a</a> | a | hi | t | 1
1501+
(1 row)
1502+
1503+
\x
1504+
SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
1505+
-[ RECORD 1 ]-----------------------------------------------------------
1506+
x | pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post
1507+
y | <e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>+
1508+
|
1509+
1510+
\x
1511+
SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');
1512+
a | b
1513+
--------+--------------
1514+
<foo/> | &lt;foo/&gt;
1515+
(1 row)
1516+

src/test/regress/expected/xml_1.out

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,3 +1343,22 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c
13431343
---
13441344
(0 rows)
13451345

1346+
-- XPath result can be boolean or number too
1347+
SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
1348+
ERROR: unsupported XML feature
1349+
LINE 1: SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml ...
1350+
^
1351+
DETAIL: This functionality requires the server to be built with libxml support.
1352+
HINT: You need to rebuild PostgreSQL using --with-libxml.
1353+
\x
1354+
SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
1355+
ERROR: unsupported XML feature
1356+
LINE 1: SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?...
1357+
^
1358+
DETAIL: This functionality requires the server to be built with libxml support.
1359+
HINT: You need to rebuild PostgreSQL using --with-libxml.
1360+
\x
1361+
SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');
1362+
ERROR: unsupported XML feature
1363+
DETAIL: This functionality requires the server to be built with libxml support.
1364+
HINT: You need to rebuild PostgreSQL using --with-libxml.

src/test/regress/expected/xml_2.out

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,9 +1190,9 @@ SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME="Japan"
11901190
(2 rows)
11911191

11921192
SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z--> bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text);
1193-
element
1194-
-------------------
1195-
a1aa2a bbbbcccc
1193+
element
1194+
----------------------
1195+
a1aa2a bbbbxxxcccc
11961196
(1 row)
11971197

11981198
SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z--> bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text PATH 'element/text()'); -- should fail
@@ -1473,3 +1473,24 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c
14731473
14
14741474
(4 rows)
14751475

1476+
-- XPath result can be boolean or number too
1477+
SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
1478+
a | b | c | d | e
1479+
----------+---+----+---+---
1480+
<a>a</a> | a | hi | t | 1
1481+
(1 row)
1482+
1483+
\x
1484+
SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
1485+
-[ RECORD 1 ]-----------------------------------------------------------
1486+
x | pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post
1487+
y | <e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>+
1488+
|
1489+
1490+
\x
1491+
SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');
1492+
a | b
1493+
--------+--------------
1494+
<foo/> | &lt;foo/&gt;
1495+
(1 row)
1496+

src/test/regress/sql/xml.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,3 +595,11 @@ INSERT INTO xmltest2 VALUES('<d><r><dc>2</dc></r></d>', 'D');
595595
SELECT xmltable.* FROM xmltest2, LATERAL xmltable('/d/r' PASSING x COLUMNS a int PATH '' || lower(_path) || 'c');
596596
SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c') PASSING x COLUMNS a int PATH '.');
597597
SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c') PASSING x COLUMNS a int PATH 'x' DEFAULT ascii(_path) - 54);
598+
599+
-- XPath result can be boolean or number too
600+
SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
601+
\x
602+
SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
603+
\x
604+
605+
SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');

0 commit comments

Comments
 (0)